diff --git itests/src/test/resources/testconfiguration.properties itests/src/test/resources/testconfiguration.properties index d08651b..860604c 100644 --- itests/src/test/resources/testconfiguration.properties +++ itests/src/test/resources/testconfiguration.properties @@ -139,6 +139,7 @@ minitez.query.files.shared=alter_merge_2_orc.q,\ orc_vectorization_ppd.q,\ parallel.q,\ ptf.q,\ + ptf_streaming.q,\ sample1.q,\ selectDistinctStar.q,\ script_env_var1.q,\ diff --git itests/util/src/main/java/org/apache/hadoop/hive/ql/QTestUtil.java itests/util/src/main/java/org/apache/hadoop/hive/ql/QTestUtil.java index 842d150..8b70085 100644 --- itests/util/src/main/java/org/apache/hadoop/hive/ql/QTestUtil.java +++ itests/util/src/main/java/org/apache/hadoop/hive/ql/QTestUtil.java @@ -976,7 +976,9 @@ private int executeClientInternal(String commands) { } command = ""; } - + if (SessionState.get() != null) { + SessionState.get().setLastCommand(null); // reset + } return rc; } diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcFactory.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcFactory.java index abf32f1..57ce849 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcFactory.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcFactory.java @@ -62,6 +62,7 @@ import org.apache.hadoop.hive.ql.plan.ptf.PTFExpressionDef; import org.apache.hadoop.hive.ql.plan.ptf.WindowFunctionDef; import org.apache.hadoop.hive.ql.plan.ptf.WindowTableFunctionDef; +import org.apache.hadoop.hive.ql.udf.ptf.Noop; import org.apache.hadoop.hive.serde2.objectinspector.StructField; import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; @@ -266,26 +267,28 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx ctx, PTFDesc conf = op.getConf(); //Since we cannot know what columns will be needed by a PTF chain, //we do not prune columns on PTFOperator for PTF chains. - if (!conf.forWindowing()) { + if (!conf.forWindowing() && !Noop.class.isInstance(conf.getFuncDef().getTFunction())) { return super.process(nd, stack, cppCtx, nodeOutputs); } - - WindowTableFunctionDef def = (WindowTableFunctionDef) conf.getFuncDef(); - ArrayList sig = new ArrayList(); - List prunedCols = cppCtx.getPrunedColList(op.getChildOperators().get(0)); - //we create a copy of prunedCols to create a list of pruned columns for PTFOperator - prunedCols = new ArrayList(prunedCols); - prunedColumnsList(prunedCols, def); RowResolver oldRR = cppCtx.getOpToParseCtxMap().get(op).getRowResolver(); + + WindowTableFunctionDef def = null; + if (conf.forWindowing()) { + def = (WindowTableFunctionDef) conf.getFuncDef(); + prunedCols = prunedColumnsList(prunedCols, def); + } + ArrayList sig = new ArrayList(); RowResolver newRR = buildPrunedRR(prunedCols, oldRR, sig); - cppCtx.getPrunedColLists().put(op, prunedInputList(prunedCols, def)); cppCtx.getOpToParseCtxMap().get(op).setRowResolver(newRR); op.getSchema().setSignature(sig); + + prunedCols = def == null ? prunedCols : prunedInputList(prunedCols, def); + cppCtx.getPrunedColLists().put(op, prunedCols); return null; } - private static RowResolver buildPrunedRR(List prunedCols, + private RowResolver buildPrunedRR(List prunedCols, RowResolver oldRR, ArrayList sig) throws SemanticException{ RowResolver newRR = new RowResolver(); HashSet prunedColsSet = new HashSet(prunedCols); @@ -302,7 +305,10 @@ private static RowResolver buildPrunedRR(List prunedCols, /* * add any input columns referenced in WindowFn args or expressions. */ - private void prunedColumnsList(List prunedCols, WindowTableFunctionDef tDef) { + private ArrayList prunedColumnsList(List prunedCols, + WindowTableFunctionDef tDef) { + //we create a copy of prunedCols to create a list of pruned columns for PTFOperator + ArrayList mergedColList = new ArrayList(prunedCols); if ( tDef.getWindowFunctions() != null ) { for(WindowFunctionDef wDef : tDef.getWindowFunctions() ) { if ( wDef.getArgs() == null) { @@ -310,22 +316,23 @@ private void prunedColumnsList(List prunedCols, WindowTableFunctionDef t } for(PTFExpressionDef arg : wDef.getArgs()) { ExprNodeDesc exprNode = arg.getExprNode(); - Utilities.mergeUniqElems(prunedCols, exprNode.getCols()); + Utilities.mergeUniqElems(mergedColList, exprNode.getCols()); } } } if(tDef.getPartition() != null){ for(PTFExpressionDef col : tDef.getPartition().getExpressions()){ ExprNodeDesc exprNode = col.getExprNode(); - Utilities.mergeUniqElems(prunedCols, exprNode.getCols()); + Utilities.mergeUniqElems(mergedColList, exprNode.getCols()); } } if(tDef.getOrder() != null){ for(PTFExpressionDef col : tDef.getOrder().getExpressions()){ ExprNodeDesc exprNode = col.getExprNode(); - Utilities.mergeUniqElems(prunedCols, exprNode.getCols()); + Utilities.mergeUniqElems(mergedColList, exprNode.getCols()); } } + return mergedColList; } /* diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/spark/SparkSkewJoinProcFactory.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/spark/SparkSkewJoinProcFactory.java index fe698ef..4c2f42e 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/spark/SparkSkewJoinProcFactory.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/spark/SparkSkewJoinProcFactory.java @@ -99,7 +99,7 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, private static void splitTask(SparkTask currentTask, ReduceWork reduceWork, ParseContext parseContext) throws SemanticException { SparkWork currentWork = currentTask.getWork(); - Set> reduceSinkSet = + Set> reduceSinkSet = SparkMapJoinResolver.getOp(reduceWork, ReduceSinkOperator.class); if (currentWork.getChildren(reduceWork).size() == 1 && canSplit(currentWork) && reduceSinkSet.size() == 1) { diff --git ql/src/java/org/apache/hadoop/hive/ql/ppd/PredicatePushDown.java ql/src/java/org/apache/hadoop/hive/ql/ppd/PredicatePushDown.java index ee7328e..7f26f0f 100644 --- ql/src/java/org/apache/hadoop/hive/ql/ppd/PredicatePushDown.java +++ ql/src/java/org/apache/hadoop/hive/ql/ppd/PredicatePushDown.java @@ -134,7 +134,9 @@ public ParseContext transform(ParseContext pctx) throws SemanticException { topNodes.addAll(pGraphContext.getTopOps().values()); ogw.startWalking(topNodes, null); - LOG.debug("After PPD:\n" + Operator.toString(pctx.getTopOps().values())); + if (LOG.isDebugEnabled()) { + LOG.debug("After PPD:\n" + Operator.toString(pctx.getTopOps().values())); + } return pGraphContext; } diff --git ql/src/test/queries/clientpositive/ptf.q ql/src/test/queries/clientpositive/ptf.q index 56eef0a..394c227 100644 --- ql/src/test/queries/clientpositive/ptf.q +++ ql/src/test/queries/clientpositive/ptf.q @@ -1,6 +1,16 @@ -- SORT_QUERY_RESULTS --1. test1 +explain +select p_mfgr, p_name, p_size, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +sum(p_retailprice) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s1 +from noop(on part + partition by p_mfgr + order by p_name + ); + select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -11,6 +21,14 @@ from noop(on part ); -- 2. testJoinWithNoop +explain +select p_mfgr, p_name, +p_size, p_size - lag(p_size,1,p_size) over (partition by p_mfgr order by p_name) as deltaSz +from noop (on (select p1.* from part p1 join part p2 on p1.p_partkey = p2.p_partkey) j +distribute by j.p_mfgr +sort by j.p_name) +; + select p_mfgr, p_name, p_size, p_size - lag(p_size,1,p_size) over (partition by p_mfgr order by p_name) as deltaSz from noop (on (select p1.* from part p1 join part p2 on p1.p_partkey = p2.p_partkey) j @@ -19,12 +37,28 @@ sort by j.p_name) ; -- 3. testOnlyPTF +explain +select p_mfgr, p_name, p_size +from noop(on part +partition by p_mfgr +order by p_name); + select p_mfgr, p_name, p_size from noop(on part partition by p_mfgr order by p_name); -- 4. testPTFAlias +explain +select p_mfgr, p_name, p_size, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +sum(p_retailprice) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s1 +from noop(on part + partition by p_mfgr + order by p_name + ) abc; + select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -35,6 +69,17 @@ from noop(on part ) abc; -- 5. testPTFAndWhereWithWindowing +explain +select p_mfgr, p_name, p_size, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +p_size, p_size - lag(p_size,1,p_size) over (partition by p_mfgr order by p_name) as deltaSz +from noop(on part + partition by p_mfgr + order by p_name + ) +; + select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -46,6 +91,18 @@ from noop(on part ; -- 6. testSWQAndPTFAndGBy +explain +select p_mfgr, p_name, p_size, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +p_size, p_size - lag(p_size,1,p_size) over (partition by p_mfgr order by p_name) as deltaSz +from noop(on part + partition by p_mfgr + order by p_name + ) +group by p_mfgr, p_name, p_size +; + select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -58,6 +115,13 @@ group by p_mfgr, p_name, p_size ; -- 7. testJoin +explain +select abc.* +from noop(on part +partition by p_mfgr +order by p_name +) abc join part p1 on abc.p_partkey = p1.p_partkey; + select abc.* from noop(on part partition by p_mfgr @@ -65,6 +129,13 @@ order by p_name ) abc join part p1 on abc.p_partkey = p1.p_partkey; -- 8. testJoinRight +explain +select abc.* +from part p1 join noop(on part +partition by p_mfgr +order by p_name +) abc on abc.p_partkey = p1.p_partkey; + select abc.* from part p1 join noop(on part partition by p_mfgr @@ -72,6 +143,13 @@ order by p_name ) abc on abc.p_partkey = p1.p_partkey; -- 9. testNoopWithMap +explain +select p_mfgr, p_name, p_size, +rank() over (partition by p_mfgr order by p_name, p_size desc) as r +from noopwithmap(on part +partition by p_mfgr +order by p_name, p_size desc); + select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name, p_size desc) as r from noopwithmap(on part @@ -79,6 +157,15 @@ partition by p_mfgr order by p_name, p_size desc); -- 10. testNoopWithMapWithWindowing +explain +select p_mfgr, p_name, p_size, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +sum(p_retailprice) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s1 +from noopwithmap(on part + partition by p_mfgr + order by p_name); + select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -88,6 +175,16 @@ from noopwithmap(on part order by p_name); -- 11. testHavingWithWindowingPTFNoGBY +explain +select p_mfgr, p_name, p_size, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +sum(p_retailprice) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s1 +from noop(on part +partition by p_mfgr +order by p_name) +; + select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -98,6 +195,16 @@ order by p_name) ; -- 12. testFunctionChain +explain +select p_mfgr, p_name, p_size, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +sum(p_retailprice) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s1 +from noop(on noopwithmap(on noop(on part +partition by p_mfgr +order by p_mfgr, p_name +))); + select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -108,6 +215,19 @@ order by p_mfgr, p_name ))); -- 13. testPTFAndWindowingInSubQ +explain +select p_mfgr, p_name, +sub1.cd, sub1.s1 +from (select p_mfgr, p_name, +count(p_size) over (partition by p_mfgr order by p_name) as cd, +p_retailprice, +sum(p_retailprice) over w1 as s1 +from noop(on part +partition by p_mfgr +order by p_name) +window w1 as (partition by p_mfgr order by p_name rows between 2 preceding and 2 following) +) sub1 ; + select p_mfgr, p_name, sub1.cd, sub1.s1 from (select p_mfgr, p_name, @@ -121,6 +241,19 @@ window w1 as (partition by p_mfgr order by p_name rows between 2 preceding and 2 ) sub1 ; -- 14. testPTFJoinWithWindowingWithCount +explain +select abc.p_mfgr, abc.p_name, +rank() over (distribute by abc.p_mfgr sort by abc.p_name) as r, +dense_rank() over (distribute by abc.p_mfgr sort by abc.p_name) as dr, +count(abc.p_name) over (distribute by abc.p_mfgr sort by abc.p_name) as cd, +abc.p_retailprice, sum(abc.p_retailprice) over (distribute by abc.p_mfgr sort by abc.p_name rows between unbounded preceding and current row) as s1, +abc.p_size, abc.p_size - lag(abc.p_size,1,abc.p_size) over (distribute by abc.p_mfgr sort by abc.p_name) as deltaSz +from noop(on part +partition by p_mfgr +order by p_name +) abc join part p1 on abc.p_partkey = p1.p_partkey +; + select abc.p_mfgr, abc.p_name, rank() over (distribute by abc.p_mfgr sort by abc.p_name) as r, dense_rank() over (distribute by abc.p_mfgr sort by abc.p_name) as dr, @@ -134,6 +267,12 @@ order by p_name ; -- 15. testDistinctInSelectWithPTF +explain +select DISTINCT p_mfgr, p_name, p_size +from noop(on part +partition by p_mfgr +order by p_name); + select DISTINCT p_mfgr, p_name, p_size from noop(on part partition by p_mfgr @@ -147,6 +286,14 @@ sum(p_retailprice) as s from part group by p_mfgr, p_brand; +explain +select p_mfgr, p_brand, s, +sum(s) over w1 as s1 +from noop(on mfgr_price_view +partition by p_mfgr +order by p_mfgr) +window w1 as ( partition by p_mfgr order by p_brand rows between 2 preceding and current row); + select p_mfgr, p_brand, s, sum(s) over w1 as s1 from noop(on mfgr_price_view @@ -173,6 +320,22 @@ dr INT, cud DOUBLE, fv1 INT); +explain +from noop(on part +partition by p_mfgr +order by p_name) +INSERT OVERWRITE TABLE part_4 select p_mfgr, p_name, p_size, +rank() over (distribute by p_mfgr sort by p_name) as r, +dense_rank() over (distribute by p_mfgr sort by p_name) as dr, +sum(p_retailprice) over (distribute by p_mfgr sort by p_name rows between unbounded preceding and current row) as s +INSERT OVERWRITE TABLE part_5 select p_mfgr,p_name, p_size, +round(sum(p_size) over (distribute by p_mfgr sort by p_size range between 5 preceding and current row),1) as s2, +rank() over (distribute by p_mfgr sort by p_mfgr, p_name) as r, +dense_rank() over (distribute by p_mfgr sort by p_mfgr, p_name) as dr, +cume_dist() over (distribute by p_mfgr sort by p_mfgr, p_name) as cud, +first_value(p_size, true) over w1 as fv1 +window w1 as (distribute by p_mfgr sort by p_mfgr, p_name rows between 2 preceding and 2 following); + from noop(on part partition by p_mfgr order by p_name) @@ -193,6 +356,23 @@ select * from part_4; select * from part_5; -- 18. testMulti2OperatorsFunctionChainWithMap +explain +select p_mfgr, p_name, +rank() over (partition by p_mfgr,p_name) as r, +dense_rank() over (partition by p_mfgr,p_name) as dr, +p_size, sum(p_size) over (partition by p_mfgr,p_name rows between unbounded preceding and current row) as s1 +from noop(on + noopwithmap(on + noop(on + noop(on part + partition by p_mfgr + order by p_mfgr) + ) + partition by p_mfgr,p_name + order by p_mfgr,p_name) + partition by p_mfgr,p_name + order by p_mfgr,p_name) ; + select p_mfgr, p_name, rank() over (partition by p_mfgr,p_name) as r, dense_rank() over (partition by p_mfgr,p_name) as dr, @@ -210,6 +390,23 @@ from noop(on order by p_mfgr,p_name) ; -- 19. testMulti3OperatorsFunctionChain +explain +select p_mfgr, p_name, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +p_size, sum(p_size) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s1 +from noop(on + noop(on + noop(on + noop(on part + partition by p_mfgr + order by p_mfgr) + ) + partition by p_mfgr,p_name + order by p_mfgr,p_name) + partition by p_mfgr + order by p_mfgr ) ; + select p_mfgr, p_name, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -227,6 +424,21 @@ from noop(on order by p_mfgr ) ; -- 20. testMultiOperatorChainWithNoWindowing +explain +select p_mfgr, p_name, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +p_size, sum(p_size) over (partition by p_mfgr order by p_name) as s1 +from noop(on + noop(on + noop(on + noop(on part + partition by p_mfgr,p_name + order by p_mfgr,p_name) + ) + partition by p_mfgr + order by p_mfgr)); + select p_mfgr, p_name, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -243,6 +455,23 @@ from noop(on -- 21. testMultiOperatorChainEndsWithNoopMap +explain +select p_mfgr, p_name, +rank() over (partition by p_mfgr,p_name) as r, +dense_rank() over (partition by p_mfgr,p_name) as dr, +p_size, sum(p_size) over (partition by p_mfgr,p_name rows between unbounded preceding and current row) as s1 +from noopwithmap(on + noop(on + noop(on + noop(on part + partition by p_mfgr,p_name + order by p_mfgr,p_name) + ) + partition by p_mfgr + order by p_mfgr) + partition by p_mfgr,p_name + order by p_mfgr,p_name); + select p_mfgr, p_name, rank() over (partition by p_mfgr,p_name) as r, dense_rank() over (partition by p_mfgr,p_name) as dr, @@ -260,6 +489,22 @@ from noopwithmap(on order by p_mfgr,p_name); -- 22. testMultiOperatorChainWithDiffPartitionForWindow1 +explain +select p_mfgr, p_name, +rank() over (partition by p_mfgr,p_name order by p_mfgr,p_name) as r, +dense_rank() over (partition by p_mfgr,p_name order by p_mfgr,p_name) as dr, +p_size, +sum(p_size) over (partition by p_mfgr,p_name order by p_mfgr,p_name rows between unbounded preceding and current row) as s1, +sum(p_size) over (partition by p_mfgr,p_name order by p_mfgr,p_name rows between unbounded preceding and current row) as s2 +from noop(on + noopwithmap(on + noop(on part + partition by p_mfgr, p_name + order by p_mfgr, p_name) + partition by p_mfgr + order by p_mfgr + )); + select p_mfgr, p_name, rank() over (partition by p_mfgr,p_name order by p_mfgr,p_name) as r, dense_rank() over (partition by p_mfgr,p_name order by p_mfgr,p_name) as dr, @@ -276,6 +521,20 @@ from noop(on )); -- 23. testMultiOperatorChainWithDiffPartitionForWindow2 +explain +select p_mfgr, p_name, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +p_size, +sum(p_size) over (partition by p_mfgr order by p_name range between unbounded preceding and current row) as s1, +sum(p_size) over (partition by p_mfgr order by p_name range between unbounded preceding and current row) as s2 +from noopwithmap(on + noop(on + noop(on part + partition by p_mfgr, p_name + order by p_mfgr, p_name) + )); + select p_mfgr, p_name, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, diff --git ql/src/test/queries/clientpositive/ptf_streaming.q ql/src/test/queries/clientpositive/ptf_streaming.q index 04b674c..7795365 100644 --- ql/src/test/queries/clientpositive/ptf_streaming.q +++ ql/src/test/queries/clientpositive/ptf_streaming.q @@ -1,8 +1,16 @@ -create temporary function noopstreaming as 'org.apache.hadoop.hive.ql.udf.ptf.NoopStreaming$NoopStreamingResolver'; - -- SORT_QUERY_RESULTS --1. test1 +explain +select p_mfgr, p_name, p_size, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +sum(p_retailprice) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s1 +from noopstreaming(on part + partition by p_mfgr + order by p_name + ); + select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -12,7 +20,15 @@ from noopstreaming(on part order by p_name ); - -- 2. testJoinWithNoop +-- 2. testJoinWithNoop +explain +select p_mfgr, p_name, +p_size, p_size - lag(p_size,1,p_size) over (partition by p_mfgr order by p_name) as deltaSz +from noopstreaming (on (select p1.* from part p1 join part p2 on p1.p_partkey = p2.p_partkey) j +distribute by j.p_mfgr +sort by j.p_name) +; + select p_mfgr, p_name, p_size, p_size - lag(p_size,1,p_size) over (partition by p_mfgr order by p_name) as deltaSz from noopstreaming (on (select p1.* from part p1 join part p2 on p1.p_partkey = p2.p_partkey) j @@ -21,6 +37,13 @@ sort by j.p_name) ; -- 7. testJoin +explain +select abc.* +from noopstreaming(on part +partition by p_mfgr +order by p_name +) abc join part p1 on abc.p_partkey = p1.p_partkey; + select abc.* from noopstreaming(on part partition by p_mfgr @@ -28,6 +51,13 @@ order by p_name ) abc join part p1 on abc.p_partkey = p1.p_partkey; -- 9. testNoopWithMap +explain +select p_mfgr, p_name, p_size, +rank() over (partition by p_mfgr order by p_name, p_size desc) as r +from noopwithmapstreaming(on part +partition by p_mfgr +order by p_name, p_size desc); + select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name, p_size desc) as r from noopwithmapstreaming(on part @@ -35,6 +65,15 @@ partition by p_mfgr order by p_name, p_size desc); -- 10. testNoopWithMapWithWindowing +explain +select p_mfgr, p_name, p_size, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +sum(p_retailprice) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s1 +from noopwithmapstreaming(on part + partition by p_mfgr + order by p_name); + select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -44,6 +83,16 @@ from noopwithmapstreaming(on part order by p_name); -- 12. testFunctionChain +explain +select p_mfgr, p_name, p_size, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +sum(p_retailprice) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s1 +from noopstreaming(on noopwithmapstreaming(on noopstreaming(on part +partition by p_mfgr +order by p_mfgr, p_name +))); + select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -54,6 +103,16 @@ order by p_mfgr, p_name ))); -- 12.1 testFunctionChain +explain +select p_mfgr, p_name, p_size, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +sum(p_retailprice) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s1 +from noopstreaming(on noopwithmap(on noopstreaming(on part +partition by p_mfgr +order by p_mfgr, p_name +))); + select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -64,6 +123,16 @@ order by p_mfgr, p_name ))); -- 12.2 testFunctionChain +explain +select p_mfgr, p_name, p_size, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +sum(p_retailprice) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s1 +from noop(on noopwithmapstreaming(on noopstreaming(on part +partition by p_mfgr +order by p_mfgr, p_name +))); + select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -74,6 +143,19 @@ order by p_mfgr, p_name ))); -- 14. testPTFJoinWithWindowingWithCount +explain +select abc.p_mfgr, abc.p_name, +rank() over (distribute by abc.p_mfgr sort by abc.p_name) as r, +dense_rank() over (distribute by abc.p_mfgr sort by abc.p_name) as dr, +count(abc.p_name) over (distribute by abc.p_mfgr sort by abc.p_name) as cd, +abc.p_retailprice, sum(abc.p_retailprice) over (distribute by abc.p_mfgr sort by abc.p_name rows between unbounded preceding and current row) as s1, +abc.p_size, abc.p_size - lag(abc.p_size,1,abc.p_size) over (distribute by abc.p_mfgr sort by abc.p_name) as deltaSz +from noopstreaming(on part +partition by p_mfgr +order by p_name +) abc join part p1 on abc.p_partkey = p1.p_partkey +; + select abc.p_mfgr, abc.p_name, rank() over (distribute by abc.p_mfgr sort by abc.p_name) as r, dense_rank() over (distribute by abc.p_mfgr sort by abc.p_name) as dr, @@ -87,6 +169,23 @@ order by p_name ; -- 18. testMulti2OperatorsFunctionChainWithMap +explain +select p_mfgr, p_name, +rank() over (partition by p_mfgr,p_name) as r, +dense_rank() over (partition by p_mfgr,p_name) as dr, +p_size, sum(p_size) over (partition by p_mfgr,p_name rows between unbounded preceding and current row) as s1 +from noopstreaming(on + noopwithmap(on + noop(on + noopstreaming(on part + partition by p_mfgr + order by p_mfgr) + ) + partition by p_mfgr,p_name + order by p_mfgr,p_name) + partition by p_mfgr,p_name + order by p_mfgr,p_name) ; + select p_mfgr, p_name, rank() over (partition by p_mfgr,p_name) as r, dense_rank() over (partition by p_mfgr,p_name) as dr, @@ -104,6 +203,23 @@ from noopstreaming(on order by p_mfgr,p_name) ; -- 19. testMulti3OperatorsFunctionChain +explain +select p_mfgr, p_name, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +p_size, sum(p_size) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s1 +from noop(on + noopstreaming(on + noop(on + noopstreaming(on part + partition by p_mfgr + order by p_mfgr) + ) + partition by p_mfgr,p_name + order by p_mfgr,p_name) + partition by p_mfgr + order by p_mfgr ) ; + select p_mfgr, p_name, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -121,6 +237,20 @@ from noop(on order by p_mfgr ) ; -- 23. testMultiOperatorChainWithDiffPartitionForWindow2 +explain +select p_mfgr, p_name, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +p_size, +sum(p_size) over (partition by p_mfgr order by p_name range between unbounded preceding and current row) as s1, +sum(p_size) over (partition by p_mfgr order by p_name range between unbounded preceding and current row) as s2 +from noopwithmapstreaming(on + noop(on + noopstreaming(on part + partition by p_mfgr, p_name + order by p_mfgr, p_name) + )); + select p_mfgr, p_name, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, diff --git ql/src/test/results/clientpositive/ptf.q.out ql/src/test/results/clientpositive/ptf.q.out index 9196b94..f678035 100644 --- ql/src/test/results/clientpositive/ptf.q.out +++ ql/src/test/results/clientpositive/ptf.q.out @@ -1,6 +1,7 @@ PREHOOK: query: -- SORT_QUERY_RESULTS --1. test1 +explain select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -10,11 +11,10 @@ from noop(on part order by p_name ) PREHOOK: type: QUERY -PREHOOK: Input: default@part -#### A masked pattern was here #### POSTHOOK: query: -- SORT_QUERY_RESULTS --1. test1 +explain select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -24,6 +24,91 @@ from noop(on part order by p_name ) POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_mfgr (type: string), p_name (type: string) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int), p_retailprice (type: double) + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col5 (type: int), _col7 (type: double) + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) + outputColumnNames: _col1, _col2, _col5, _col7 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr, p_name, p_size, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +sum(p_retailprice) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s1 +from noop(on part + partition by p_mfgr + order by p_name + ) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, p_name, p_size, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +sum(p_retailprice) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s1 +from noop(on part + partition by p_mfgr + order by p_name + ) +POSTHOOK: type: QUERY POSTHOOK: Input: default@part #### A masked pattern was here #### Manufacturer#1 almond antique burnished rose metallic 2 1 1 1173.15 @@ -53,21 +138,141 @@ Manufacturer#5 almond antique sky peru orange 2 3 3 5190.08 Manufacturer#5 almond aquamarine dodger light gainsboro 46 4 4 6208.18 Manufacturer#5 almond azure blanched chiffon midnight 23 5 5 7672.66 PREHOOK: query: -- 2. testJoinWithNoop +explain select p_mfgr, p_name, p_size, p_size - lag(p_size,1,p_size) over (partition by p_mfgr order by p_name) as deltaSz from noop (on (select p1.* from part p1 join part p2 on p1.p_partkey = p2.p_partkey) j distribute by j.p_mfgr sort by j.p_name) PREHOOK: type: QUERY -PREHOOK: Input: default@part -#### A masked pattern was here #### POSTHOOK: query: -- 2. testJoinWithNoop +explain select p_mfgr, p_name, p_size, p_size - lag(p_size,1,p_size) over (partition by p_mfgr order by p_name) as deltaSz from noop (on (select p1.* from part p1 join part p2 on p1.p_partkey = p2.p_partkey) j distribute by j.p_mfgr sort by j.p_name) POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-3 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-3 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: p1 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: p_partkey is not null (type: boolean) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_partkey (type: int) + sort order: + + Map-reduce partition columns: p_partkey (type: int) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int) + TableScan + alias: p2 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: p_partkey is not null (type: boolean) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_partkey (type: int) + sort order: + + Map-reduce partition columns: p_partkey (type: int) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 p_partkey (type: int) + 1 p_partkey (type: int) + outputColumnNames: _col1, _col2, _col5 + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) + Reduce Operator Tree: + Extract + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + value expressions: _col5 (type: int) + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) + outputColumnNames: _col1, _col2, _col5 + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), (_col5 - _wcol0) (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr, p_name, +p_size, p_size - lag(p_size,1,p_size) over (partition by p_mfgr order by p_name) as deltaSz +from noop (on (select p1.* from part p1 join part p2 on p1.p_partkey = p2.p_partkey) j +distribute by j.p_mfgr +sort by j.p_name) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, p_name, +p_size, p_size - lag(p_size,1,p_size) over (partition by p_mfgr order by p_name) as deltaSz +from noop (on (select p1.* from part p1 join part p2 on p1.p_partkey = p2.p_partkey) j +distribute by j.p_mfgr +sort by j.p_name) +POSTHOOK: type: QUERY POSTHOOK: Input: default@part #### A masked pattern was here #### Manufacturer#1 almond antique burnished rose metallic 2 0 @@ -99,19 +304,71 @@ Manufacturer#5 almond antique sky peru orange 2 -4 Manufacturer#5 almond aquamarine dodger light gainsboro 46 44 Manufacturer#5 almond azure blanched chiffon midnight 23 -23 PREHOOK: query: -- 3. testOnlyPTF +explain select p_mfgr, p_name, p_size from noop(on part partition by p_mfgr order by p_name) PREHOOK: type: QUERY -PREHOOK: Input: default@part -#### A masked pattern was here #### POSTHOOK: query: -- 3. testOnlyPTF +explain select p_mfgr, p_name, p_size from noop(on part partition by p_mfgr order by p_name) POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_mfgr (type: string), p_name (type: string) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int) + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr, p_name, p_size +from noop(on part +partition by p_mfgr +order by p_name) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, p_name, p_size +from noop(on part +partition by p_mfgr +order by p_name) +POSTHOOK: type: QUERY POSTHOOK: Input: default@part #### A masked pattern was here #### Manufacturer#1 almond antique burnished rose metallic 2 @@ -141,6 +398,7 @@ Manufacturer#5 almond antique sky peru orange 2 Manufacturer#5 almond aquamarine dodger light gainsboro 46 Manufacturer#5 almond azure blanched chiffon midnight 23 PREHOOK: query: -- 4. testPTFAlias +explain select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -150,9 +408,8 @@ from noop(on part order by p_name ) abc PREHOOK: type: QUERY -PREHOOK: Input: default@part -#### A masked pattern was here #### POSTHOOK: query: -- 4. testPTFAlias +explain select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -162,6 +419,91 @@ from noop(on part order by p_name ) abc POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_mfgr (type: string), p_name (type: string) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int), p_retailprice (type: double) + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col5 (type: int), _col7 (type: double) + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) + outputColumnNames: _col1, _col2, _col5, _col7 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr, p_name, p_size, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +sum(p_retailprice) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s1 +from noop(on part + partition by p_mfgr + order by p_name + ) abc +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, p_name, p_size, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +sum(p_retailprice) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s1 +from noop(on part + partition by p_mfgr + order by p_name + ) abc +POSTHOOK: type: QUERY POSTHOOK: Input: default@part #### A masked pattern was here #### Manufacturer#1 almond antique burnished rose metallic 2 1 1 1173.15 @@ -191,6 +533,7 @@ Manufacturer#5 almond antique sky peru orange 2 3 3 5190.08 Manufacturer#5 almond aquamarine dodger light gainsboro 46 4 4 6208.18 Manufacturer#5 almond azure blanched chiffon midnight 23 5 5 7672.66 PREHOOK: query: -- 5. testPTFAndWhereWithWindowing +explain select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -200,9 +543,8 @@ from noop(on part order by p_name ) PREHOOK: type: QUERY -PREHOOK: Input: default@part -#### A masked pattern was here #### POSTHOOK: query: -- 5. testPTFAndWhereWithWindowing +explain select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -212,6 +554,91 @@ from noop(on part order by p_name ) POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_mfgr (type: string), p_name (type: string) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int) + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col5 (type: int) + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) + outputColumnNames: _col1, _col2, _col5 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _col5 (type: int), (_col5 - _wcol2) (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr, p_name, p_size, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +p_size, p_size - lag(p_size,1,p_size) over (partition by p_mfgr order by p_name) as deltaSz +from noop(on part + partition by p_mfgr + order by p_name + ) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, p_name, p_size, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +p_size, p_size - lag(p_size,1,p_size) over (partition by p_mfgr order by p_name) as deltaSz +from noop(on part + partition by p_mfgr + order by p_name + ) +POSTHOOK: type: QUERY POSTHOOK: Input: default@part #### A masked pattern was here #### Manufacturer#1 almond antique burnished rose metallic 2 1 1 2 0 @@ -241,6 +668,7 @@ Manufacturer#5 almond antique sky peru orange 2 3 3 2 -4 Manufacturer#5 almond aquamarine dodger light gainsboro 46 4 4 46 44 Manufacturer#5 almond azure blanched chiffon midnight 23 5 5 23 -23 PREHOOK: query: -- 6. testSWQAndPTFAndGBy +explain select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -251,9 +679,8 @@ from noop(on part ) group by p_mfgr, p_name, p_size PREHOOK: type: QUERY -PREHOOK: Input: default@part -#### A masked pattern was here #### POSTHOOK: query: -- 6. testSWQAndPTFAndGBy +explain select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -264,6 +691,125 @@ from noop(on part ) group by p_mfgr, p_name, p_size POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-3 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-3 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_mfgr (type: string), p_name (type: string) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int) + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int) + outputColumnNames: _col2, _col1, _col5 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col2 (type: string), _col1 (type: string), _col5 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: int) + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), VALUE._col0 (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int), _wcol0 (type: int), _wcol1 (type: int), _col2 (type: int), (_col2 - _wcol2) (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr, p_name, p_size, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +p_size, p_size - lag(p_size,1,p_size) over (partition by p_mfgr order by p_name) as deltaSz +from noop(on part + partition by p_mfgr + order by p_name + ) +group by p_mfgr, p_name, p_size +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, p_name, p_size, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +p_size, p_size - lag(p_size,1,p_size) over (partition by p_mfgr order by p_name) as deltaSz +from noop(on part + partition by p_mfgr + order by p_name + ) +group by p_mfgr, p_name, p_size +POSTHOOK: type: QUERY POSTHOOK: Input: default@part #### A masked pattern was here #### Manufacturer#1 almond antique burnished rose metallic 2 1 1 2 0 @@ -292,21 +838,116 @@ Manufacturer#5 almond antique sky peru orange 2 3 3 2 -4 Manufacturer#5 almond aquamarine dodger light gainsboro 46 4 4 46 44 Manufacturer#5 almond azure blanched chiffon midnight 23 5 5 23 -23 PREHOOK: query: -- 7. testJoin +explain select abc.* from noop(on part partition by p_mfgr order by p_name ) abc join part p1 on abc.p_partkey = p1.p_partkey PREHOOK: type: QUERY -PREHOOK: Input: default@part -#### A masked pattern was here #### POSTHOOK: query: -- 7. testJoin +explain select abc.* from noop(on part partition by p_mfgr order by p_name ) abc join part p1 on abc.p_partkey = p1.p_partkey POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_mfgr (type: string), p_name (type: string) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: _col0 is not null (type: boolean) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + TableScan + alias: p1 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: p_partkey is not null (type: boolean) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_partkey (type: int) + sort order: + + Map-reduce partition columns: p_partkey (type: int) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 p_partkey (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select abc.* +from noop(on part +partition by p_mfgr +order by p_name +) abc join part p1 on abc.p_partkey = p1.p_partkey +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select abc.* +from noop(on part +partition by p_mfgr +order by p_name +) abc join part p1 on abc.p_partkey = p1.p_partkey +POSTHOOK: type: QUERY POSTHOOK: Input: default@part #### A masked pattern was here #### 105685 almond antique violet chocolate turquoise Manufacturer#2 Brand#22 MEDIUM ANODIZED COPPER 14 MED CAN 1690.68 ly pending requ @@ -338,21 +979,116 @@ POSTHOOK: Input: default@part 86428 almond aquamarine burnished black steel Manufacturer#1 Brand#12 STANDARD ANODIZED STEEL 28 WRAP BAG 1414.42 arefully 90681 almond antique chartreuse khaki white Manufacturer#3 Brand#31 MEDIUM BURNISHED TIN 17 SM CASE 1671.68 are slyly after the sl PREHOOK: query: -- 8. testJoinRight +explain select abc.* from part p1 join noop(on part partition by p_mfgr order by p_name ) abc on abc.p_partkey = p1.p_partkey PREHOOK: type: QUERY -PREHOOK: Input: default@part -#### A masked pattern was here #### POSTHOOK: query: -- 8. testJoinRight +explain select abc.* from part p1 join noop(on part partition by p_mfgr order by p_name ) abc on abc.p_partkey = p1.p_partkey POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-2 is a root stage + Stage-1 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_mfgr (type: string), p_name (type: string) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: _col0 is not null (type: boolean) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: p1 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: p_partkey is not null (type: boolean) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_partkey (type: int) + sort order: + + Map-reduce partition columns: p_partkey (type: int) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + TableScan + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 p_partkey (type: int) + 1 _col0 (type: int) + outputColumnNames: _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col12 (type: int), _col13 (type: string), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select abc.* +from part p1 join noop(on part +partition by p_mfgr +order by p_name +) abc on abc.p_partkey = p1.p_partkey +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select abc.* +from part p1 join noop(on part +partition by p_mfgr +order by p_name +) abc on abc.p_partkey = p1.p_partkey +POSTHOOK: type: QUERY POSTHOOK: Input: default@part #### A masked pattern was here #### 105685 almond antique violet chocolate turquoise Manufacturer#2 Brand#22 MEDIUM ANODIZED COPPER 14 MED CAN 1690.68 ly pending requ @@ -384,21 +1120,101 @@ POSTHOOK: Input: default@part 86428 almond aquamarine burnished black steel Manufacturer#1 Brand#12 STANDARD ANODIZED STEEL 28 WRAP BAG 1414.42 arefully 90681 almond antique chartreuse khaki white Manufacturer#3 Brand#31 MEDIUM BURNISHED TIN 17 SM CASE 1671.68 are slyly after the sl PREHOOK: query: -- 9. testNoopWithMap +explain select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name, p_size desc) as r from noopwithmap(on part partition by p_mfgr order by p_name, p_size desc) PREHOOK: type: QUERY -PREHOOK: Input: default@part -#### A masked pattern was here #### POSTHOOK: query: -- 9. testNoopWithMap +explain select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name, p_size desc) as r from noopwithmap(on part partition by p_mfgr order by p_name, p_size desc) POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_mfgr (type: string), p_name (type: string), p_size (type: int) + sort order: ++- + Map-reduce partition columns: p_mfgr (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int) + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int) + sort order: ++- + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), KEY.reducesinkkey2 (type: int) + outputColumnNames: _col1, _col2, _col5 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr, p_name, p_size, +rank() over (partition by p_mfgr order by p_name, p_size desc) as r +from noopwithmap(on part +partition by p_mfgr +order by p_name, p_size desc) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, p_name, p_size, +rank() over (partition by p_mfgr order by p_name, p_size desc) as r +from noopwithmap(on part +partition by p_mfgr +order by p_name, p_size desc) +POSTHOOK: type: QUERY POSTHOOK: Input: default@part #### A masked pattern was here #### Manufacturer#1 almond antique burnished rose metallic 2 1 @@ -428,6 +1244,7 @@ Manufacturer#5 almond antique sky peru orange 2 3 Manufacturer#5 almond aquamarine dodger light gainsboro 46 4 Manufacturer#5 almond azure blanched chiffon midnight 23 5 PREHOOK: query: -- 10. testNoopWithMapWithWindowing +explain select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -436,9 +1253,8 @@ from noopwithmap(on part partition by p_mfgr order by p_name) PREHOOK: type: QUERY -PREHOOK: Input: default@part -#### A masked pattern was here #### POSTHOOK: query: -- 10. testNoopWithMapWithWindowing +explain select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -447,6 +1263,91 @@ from noopwithmap(on part partition by p_mfgr order by p_name) POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_mfgr (type: string), p_name (type: string) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int), p_retailprice (type: double) + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col5 (type: int), _col7 (type: double) + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) + outputColumnNames: _col1, _col2, _col5, _col7 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr, p_name, p_size, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +sum(p_retailprice) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s1 +from noopwithmap(on part + partition by p_mfgr + order by p_name) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, p_name, p_size, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +sum(p_retailprice) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s1 +from noopwithmap(on part + partition by p_mfgr + order by p_name) +POSTHOOK: type: QUERY POSTHOOK: Input: default@part #### A masked pattern was here #### Manufacturer#1 almond antique burnished rose metallic 2 1 1 1173.15 @@ -476,6 +1377,7 @@ Manufacturer#5 almond antique sky peru orange 2 3 3 5190.08 Manufacturer#5 almond aquamarine dodger light gainsboro 46 4 4 6208.18 Manufacturer#5 almond azure blanched chiffon midnight 23 5 5 7672.66 PREHOOK: query: -- 11. testHavingWithWindowingPTFNoGBY +explain select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -484,9 +1386,8 @@ from noop(on part partition by p_mfgr order by p_name) PREHOOK: type: QUERY -PREHOOK: Input: default@part -#### A masked pattern was here #### POSTHOOK: query: -- 11. testHavingWithWindowingPTFNoGBY +explain select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -495,6 +1396,89 @@ from noop(on part partition by p_mfgr order by p_name) POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_mfgr (type: string), p_name (type: string) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int), p_retailprice (type: double) + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col5 (type: int), _col7 (type: double) + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) + outputColumnNames: _col1, _col2, _col5, _col7 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr, p_name, p_size, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +sum(p_retailprice) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s1 +from noop(on part +partition by p_mfgr +order by p_name) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, p_name, p_size, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +sum(p_retailprice) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s1 +from noop(on part +partition by p_mfgr +order by p_name) +POSTHOOK: type: QUERY POSTHOOK: Input: default@part #### A masked pattern was here #### Manufacturer#1 almond antique burnished rose metallic 2 1 1 1173.15 @@ -524,6 +1508,7 @@ Manufacturer#5 almond antique sky peru orange 2 3 3 5190.08 Manufacturer#5 almond aquamarine dodger light gainsboro 46 4 4 6208.18 Manufacturer#5 almond azure blanched chiffon midnight 23 5 5 7672.66 PREHOOK: query: -- 12. testFunctionChain +explain select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -533,9 +1518,8 @@ partition by p_mfgr order by p_mfgr, p_name ))) PREHOOK: type: QUERY -PREHOOK: Input: default@part -#### A masked pattern was here #### POSTHOOK: query: -- 12. testFunctionChain +explain select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -545,6 +1529,116 @@ partition by p_mfgr order by p_mfgr, p_name ))) POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-3 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-3 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_mfgr (type: string), p_mfgr (type: string), p_name (type: string) + sort order: +++ + Map-reduce partition columns: p_mfgr (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int), p_retailprice (type: double) + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col2 (type: string), _col2 (type: string), _col1 (type: string) + sort order: +++ + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int), _col7 (type: double) + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col5 (type: int), _col7 (type: double) + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) + outputColumnNames: _col1, _col2, _col5, _col7 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr, p_name, p_size, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +sum(p_retailprice) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s1 +from noop(on noopwithmap(on noop(on part +partition by p_mfgr +order by p_mfgr, p_name +))) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, p_name, p_size, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +sum(p_retailprice) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s1 +from noop(on noopwithmap(on noop(on part +partition by p_mfgr +order by p_mfgr, p_name +))) +POSTHOOK: type: QUERY POSTHOOK: Input: default@part #### A masked pattern was here #### Manufacturer#1 almond antique burnished rose metallic 2 1 1 1173.15 @@ -574,6 +1668,7 @@ Manufacturer#5 almond antique sky peru orange 2 3 3 5190.08 Manufacturer#5 almond aquamarine dodger light gainsboro 46 4 4 6208.18 Manufacturer#5 almond azure blanched chiffon midnight 23 5 5 7672.66 PREHOOK: query: -- 13. testPTFAndWindowingInSubQ +explain select p_mfgr, p_name, sub1.cd, sub1.s1 from (select p_mfgr, p_name, @@ -586,9 +1681,8 @@ order by p_name) window w1 as (partition by p_mfgr order by p_name rows between 2 preceding and 2 following) ) sub1 PREHOOK: type: QUERY -PREHOOK: Input: default@part -#### A masked pattern was here #### POSTHOOK: query: -- 13. testPTFAndWindowingInSubQ +explain select p_mfgr, p_name, sub1.cd, sub1.s1 from (select p_mfgr, p_name, @@ -601,6 +1695,97 @@ order by p_name) window w1 as (partition by p_mfgr order by p_name rows between 2 preceding and 2 following) ) sub1 POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_mfgr (type: string), p_name (type: string) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int), p_retailprice (type: double) + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col5 (type: int), _col7 (type: double) + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) + outputColumnNames: _col1, _col2, _col5, _col7 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col1 (type: string), _wcol0 (type: bigint), _wcol1 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr, p_name, +sub1.cd, sub1.s1 +from (select p_mfgr, p_name, +count(p_size) over (partition by p_mfgr order by p_name) as cd, +p_retailprice, +sum(p_retailprice) over w1 as s1 +from noop(on part +partition by p_mfgr +order by p_name) +window w1 as (partition by p_mfgr order by p_name rows between 2 preceding and 2 following) +) sub1 +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, p_name, +sub1.cd, sub1.s1 +from (select p_mfgr, p_name, +count(p_size) over (partition by p_mfgr order by p_name) as cd, +p_retailprice, +sum(p_retailprice) over w1 as s1 +from noop(on part +partition by p_mfgr +order by p_name) +window w1 as (partition by p_mfgr order by p_name rows between 2 preceding and 2 following) +) sub1 +POSTHOOK: type: QUERY POSTHOOK: Input: default@part #### A masked pattern was here #### Manufacturer#1 almond antique burnished rose metallic 2 4100.06 @@ -630,6 +1815,7 @@ Manufacturer#5 almond antique sky peru orange 3 7672.66 Manufacturer#5 almond aquamarine dodger light gainsboro 4 5882.970000000001 Manufacturer#5 almond azure blanched chiffon midnight 5 4271.3099999999995 PREHOOK: query: -- 14. testPTFJoinWithWindowingWithCount +explain select abc.p_mfgr, abc.p_name, rank() over (distribute by abc.p_mfgr sort by abc.p_name) as r, dense_rank() over (distribute by abc.p_mfgr sort by abc.p_name) as dr, @@ -641,9 +1827,8 @@ partition by p_mfgr order by p_name ) abc join part p1 on abc.p_partkey = p1.p_partkey PREHOOK: type: QUERY -PREHOOK: Input: default@part -#### A masked pattern was here #### POSTHOOK: query: -- 14. testPTFJoinWithWindowingWithCount +explain select abc.p_mfgr, abc.p_name, rank() over (distribute by abc.p_mfgr sort by abc.p_name) as r, dense_rank() over (distribute by abc.p_mfgr sort by abc.p_name) as dr, @@ -655,6 +1840,136 @@ partition by p_mfgr order by p_name ) abc join part p1 on abc.p_partkey = p1.p_partkey POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-3 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-3 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_mfgr (type: string), p_name (type: string) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_size (type: int), p_retailprice (type: double) + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: _col0 is not null (type: boolean) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int), _col7 (type: double) + TableScan + alias: p1 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: p_partkey is not null (type: boolean) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_partkey (type: int) + sort order: + + Map-reduce partition columns: p_partkey (type: int) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 p_partkey (type: int) + outputColumnNames: _col1, _col2, _col5, _col7 + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + value expressions: _col5 (type: int), _col7 (type: double) + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) + outputColumnNames: _col1, _col2, _col5, _col7 + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col1 (type: string), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: bigint), _col7 (type: double), _wcol3 (type: double), _col5 (type: int), (_col5 - _wcol4) (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select abc.p_mfgr, abc.p_name, +rank() over (distribute by abc.p_mfgr sort by abc.p_name) as r, +dense_rank() over (distribute by abc.p_mfgr sort by abc.p_name) as dr, +count(abc.p_name) over (distribute by abc.p_mfgr sort by abc.p_name) as cd, +abc.p_retailprice, sum(abc.p_retailprice) over (distribute by abc.p_mfgr sort by abc.p_name rows between unbounded preceding and current row) as s1, +abc.p_size, abc.p_size - lag(abc.p_size,1,abc.p_size) over (distribute by abc.p_mfgr sort by abc.p_name) as deltaSz +from noop(on part +partition by p_mfgr +order by p_name +) abc join part p1 on abc.p_partkey = p1.p_partkey +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select abc.p_mfgr, abc.p_name, +rank() over (distribute by abc.p_mfgr sort by abc.p_name) as r, +dense_rank() over (distribute by abc.p_mfgr sort by abc.p_name) as dr, +count(abc.p_name) over (distribute by abc.p_mfgr sort by abc.p_name) as cd, +abc.p_retailprice, sum(abc.p_retailprice) over (distribute by abc.p_mfgr sort by abc.p_name rows between unbounded preceding and current row) as s1, +abc.p_size, abc.p_size - lag(abc.p_size,1,abc.p_size) over (distribute by abc.p_mfgr sort by abc.p_name) as deltaSz +from noop(on part +partition by p_mfgr +order by p_name +) abc join part p1 on abc.p_partkey = p1.p_partkey +POSTHOOK: type: QUERY POSTHOOK: Input: default@part #### A masked pattern was here #### Manufacturer#1 almond antique burnished rose metallic 1 1 4 1173.15 1173.15 2 0 @@ -686,19 +2001,103 @@ Manufacturer#5 almond antique sky peru orange 3 3 3 1788.73 5190.08 2 -4 Manufacturer#5 almond aquamarine dodger light gainsboro 4 4 4 1018.1 6208.18 46 44 Manufacturer#5 almond azure blanched chiffon midnight 5 5 5 1464.48 7672.66 23 -23 PREHOOK: query: -- 15. testDistinctInSelectWithPTF +explain select DISTINCT p_mfgr, p_name, p_size from noop(on part partition by p_mfgr order by p_name) PREHOOK: type: QUERY -PREHOOK: Input: default@part -#### A masked pattern was here #### POSTHOOK: query: -- 15. testDistinctInSelectWithPTF +explain select DISTINCT p_mfgr, p_name, p_size from noop(on part partition by p_mfgr order by p_name) POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_mfgr (type: string), p_name (type: string) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int) + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int) + outputColumnNames: _col2, _col1, _col5 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col2 (type: string), _col1 (type: string), _col5 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select DISTINCT p_mfgr, p_name, p_size +from noop(on part +partition by p_mfgr +order by p_name) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select DISTINCT p_mfgr, p_name, p_size +from noop(on part +partition by p_mfgr +order by p_name) +POSTHOOK: type: QUERY POSTHOOK: Input: default@part #### A masked pattern was here #### Manufacturer#1 almond antique burnished rose metallic 2 @@ -746,6 +2145,122 @@ POSTHOOK: type: CREATEVIEW POSTHOOK: Input: default@part POSTHOOK: Output: database:default POSTHOOK: Output: default@mfgr_price_view +PREHOOK: query: explain +select p_mfgr, p_brand, s, +sum(s) over w1 as s1 +from noop(on mfgr_price_view +partition by p_mfgr +order by p_mfgr) +window w1 as ( partition by p_mfgr order by p_brand rows between 2 preceding and current row) +PREHOOK: type: QUERY +POSTHOOK: query: explain +select p_mfgr, p_brand, s, +sum(s) over w1 as s1 +from noop(on mfgr_price_view +partition by p_mfgr +order by p_mfgr) +window w1 as ( partition by p_mfgr order by p_brand rows between 2 preceding and current row) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-3 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-3 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: p_mfgr (type: string), p_brand (type: string), p_retailprice (type: double) + outputColumnNames: p_mfgr, p_brand, p_retailprice + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(p_retailprice) + keys: p_mfgr (type: string), p_brand (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: double) + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string), _col0 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: double) + Reduce Operator Tree: + Extract + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: double) + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), VALUE._col0 (type: double) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: double), _wcol0 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + PREHOOK: query: select p_mfgr, p_brand, s, sum(s) over w1 as s1 from noop(on mfgr_price_view @@ -828,6 +2343,189 @@ fv1 INT) POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@part_5 +PREHOOK: query: explain +from noop(on part +partition by p_mfgr +order by p_name) +INSERT OVERWRITE TABLE part_4 select p_mfgr, p_name, p_size, +rank() over (distribute by p_mfgr sort by p_name) as r, +dense_rank() over (distribute by p_mfgr sort by p_name) as dr, +sum(p_retailprice) over (distribute by p_mfgr sort by p_name rows between unbounded preceding and current row) as s +INSERT OVERWRITE TABLE part_5 select p_mfgr,p_name, p_size, +round(sum(p_size) over (distribute by p_mfgr sort by p_size range between 5 preceding and current row),1) as s2, +rank() over (distribute by p_mfgr sort by p_mfgr, p_name) as r, +dense_rank() over (distribute by p_mfgr sort by p_mfgr, p_name) as dr, +cume_dist() over (distribute by p_mfgr sort by p_mfgr, p_name) as cud, +first_value(p_size, true) over w1 as fv1 +window w1 as (distribute by p_mfgr sort by p_mfgr, p_name rows between 2 preceding and 2 following) +PREHOOK: type: QUERY +POSTHOOK: query: explain +from noop(on part +partition by p_mfgr +order by p_name) +INSERT OVERWRITE TABLE part_4 select p_mfgr, p_name, p_size, +rank() over (distribute by p_mfgr sort by p_name) as r, +dense_rank() over (distribute by p_mfgr sort by p_name) as dr, +sum(p_retailprice) over (distribute by p_mfgr sort by p_name rows between unbounded preceding and current row) as s +INSERT OVERWRITE TABLE part_5 select p_mfgr,p_name, p_size, +round(sum(p_size) over (distribute by p_mfgr sort by p_size range between 5 preceding and current row),1) as s2, +rank() over (distribute by p_mfgr sort by p_mfgr, p_name) as r, +dense_rank() over (distribute by p_mfgr sort by p_mfgr, p_name) as dr, +cume_dist() over (distribute by p_mfgr sort by p_mfgr, p_name) as cud, +first_value(p_size, true) over w1 as fv1 +window w1 as (distribute by p_mfgr sort by p_mfgr, p_name rows between 2 preceding and 2 following) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-2 is a root stage + Stage-3 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-3 + Stage-4 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-2 + Stage-6 depends on stages: Stage-5 + Stage-1 depends on stages: Stage-6 + Stage-7 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_mfgr (type: string), p_name (type: string) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int), p_retailprice (type: double) + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col5 (type: int), _col7 (type: double) + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) + outputColumnNames: _col1, _col2, _col5, _col7 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.part_4 + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.part_4 + + Stage: Stage-4 + Stats-Aggr Operator + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col2 (type: string), _col5 (type: int) + sort order: ++ + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Reduce Operator Tree: + Select Operator + expressions: VALUE._col1 (type: string), KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: int) + outputColumnNames: _col1, _col2, _col5 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col2 (type: string), _col2 (type: string), _col1 (type: string) + sort order: +++ + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _wcol0 (type: bigint), _col5 (type: int) + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: bigint), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col4 (type: int) + outputColumnNames: _col0, _col2, _col3, _col6 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col3 (type: string), _col2 (type: string), _col6 (type: int), UDFToInteger(round(_col0, 1)) (type: int), _wcol1 (type: int), _wcol2 (type: int), _wcol3 (type: double), _wcol4 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.part_5 + + Stage: Stage-1 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.part_5 + + Stage: Stage-7 + Stats-Aggr Operator + PREHOOK: query: from noop(on part partition by p_mfgr order by p_name) @@ -947,6 +2645,7 @@ Manufacturer#5 almond antique sky peru orange 2 2 3 3 0.6 31 Manufacturer#5 almond aquamarine dodger light gainsboro 46 46 4 4 0.8 6 Manufacturer#5 almond azure blanched chiffon midnight 23 23 5 5 1.0 2 PREHOOK: query: -- 18. testMulti2OperatorsFunctionChainWithMap +explain select p_mfgr, p_name, rank() over (partition by p_mfgr,p_name) as r, dense_rank() over (partition by p_mfgr,p_name) as dr, @@ -963,9 +2662,8 @@ from noop(on partition by p_mfgr,p_name order by p_mfgr,p_name) PREHOOK: type: QUERY -PREHOOK: Input: default@part -#### A masked pattern was here #### POSTHOOK: query: -- 18. testMulti2OperatorsFunctionChainWithMap +explain select p_mfgr, p_name, rank() over (partition by p_mfgr,p_name) as r, dense_rank() over (partition by p_mfgr,p_name) as dr, @@ -982,6 +2680,130 @@ from noop(on partition by p_mfgr,p_name order by p_mfgr,p_name) POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-3 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-3 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_mfgr (type: string), p_mfgr (type: string) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int) + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string), _col2 (type: string), _col1 (type: string) + sort order: ++++ + Map-reduce partition columns: _col2 (type: string), _col1 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string), _col2 (type: string), _col1 (type: string) + sort order: ++++ + Map-reduce partition columns: _col2 (type: string), _col1 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col5 (type: int) + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) + outputColumnNames: _col1, _col2, _col5 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col1 (type: string), _wcol0 (type: int), _wcol1 (type: int), _col5 (type: int), _wcol2 (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr, p_name, +rank() over (partition by p_mfgr,p_name) as r, +dense_rank() over (partition by p_mfgr,p_name) as dr, +p_size, sum(p_size) over (partition by p_mfgr,p_name rows between unbounded preceding and current row) as s1 +from noop(on + noopwithmap(on + noop(on + noop(on part + partition by p_mfgr + order by p_mfgr) + ) + partition by p_mfgr,p_name + order by p_mfgr,p_name) + partition by p_mfgr,p_name + order by p_mfgr,p_name) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, p_name, +rank() over (partition by p_mfgr,p_name) as r, +dense_rank() over (partition by p_mfgr,p_name) as dr, +p_size, sum(p_size) over (partition by p_mfgr,p_name rows between unbounded preceding and current row) as s1 +from noop(on + noopwithmap(on + noop(on + noop(on part + partition by p_mfgr + order by p_mfgr) + ) + partition by p_mfgr,p_name + order by p_mfgr,p_name) + partition by p_mfgr,p_name + order by p_mfgr,p_name) +POSTHOOK: type: QUERY POSTHOOK: Input: default@part #### A masked pattern was here #### Manufacturer#1 almond antique burnished rose metallic 1 1 2 2 @@ -1011,6 +2833,7 @@ Manufacturer#5 almond antique sky peru orange 1 1 2 2 Manufacturer#5 almond aquamarine dodger light gainsboro 1 1 46 46 Manufacturer#5 almond azure blanched chiffon midnight 1 1 23 23 PREHOOK: query: -- 19. testMulti3OperatorsFunctionChain +explain select p_mfgr, p_name, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -1027,9 +2850,8 @@ from noop(on partition by p_mfgr order by p_mfgr ) PREHOOK: type: QUERY -PREHOOK: Input: default@part -#### A masked pattern was here #### POSTHOOK: query: -- 19. testMulti3OperatorsFunctionChain +explain select p_mfgr, p_name, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -1046,6 +2868,151 @@ from noop(on partition by p_mfgr order by p_mfgr ) POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-3 depends on stages: Stage-2 + Stage-4 depends on stages: Stage-3 + Stage-0 depends on stages: Stage-4 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_mfgr (type: string), p_mfgr (type: string) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int) + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string), _col2 (type: string), _col1 (type: string) + sort order: ++++ + Map-reduce partition columns: _col2 (type: string), _col1 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col2 (type: string), _col2 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col5 (type: int) + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) + outputColumnNames: _col1, _col2, _col5 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col1 (type: string), _wcol0 (type: int), _wcol1 (type: int), _col5 (type: int), _wcol2 (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr, p_name, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +p_size, sum(p_size) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s1 +from noop(on + noop(on + noop(on + noop(on part + partition by p_mfgr + order by p_mfgr) + ) + partition by p_mfgr,p_name + order by p_mfgr,p_name) + partition by p_mfgr + order by p_mfgr ) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, p_name, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +p_size, sum(p_size) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s1 +from noop(on + noop(on + noop(on + noop(on part + partition by p_mfgr + order by p_mfgr) + ) + partition by p_mfgr,p_name + order by p_mfgr,p_name) + partition by p_mfgr + order by p_mfgr ) +POSTHOOK: type: QUERY POSTHOOK: Input: default@part #### A masked pattern was here #### Manufacturer#1 almond antique burnished rose metallic 1 1 2 2 @@ -1075,6 +3042,7 @@ Manufacturer#5 almond antique sky peru orange 3 3 2 39 Manufacturer#5 almond aquamarine dodger light gainsboro 4 4 46 85 Manufacturer#5 almond azure blanched chiffon midnight 5 5 23 108 PREHOOK: query: -- 20. testMultiOperatorChainWithNoWindowing +explain select p_mfgr, p_name, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -1089,9 +3057,8 @@ from noop(on partition by p_mfgr order by p_mfgr)) PREHOOK: type: QUERY -PREHOOK: Input: default@part -#### A masked pattern was here #### POSTHOOK: query: -- 20. testMultiOperatorChainWithNoWindowing +explain select p_mfgr, p_name, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -1106,6 +3073,124 @@ from noop(on partition by p_mfgr order by p_mfgr)) POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-3 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-3 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_mfgr (type: string), p_name (type: string), p_mfgr (type: string), p_name (type: string) + sort order: ++++ + Map-reduce partition columns: p_mfgr (type: string), p_name (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int) + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col2 (type: string), _col2 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col5 (type: int) + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) + outputColumnNames: _col1, _col2, _col5 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col1 (type: string), _wcol0 (type: int), _wcol1 (type: int), _col5 (type: int), _wcol2 (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr, p_name, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +p_size, sum(p_size) over (partition by p_mfgr order by p_name) as s1 +from noop(on + noop(on + noop(on + noop(on part + partition by p_mfgr,p_name + order by p_mfgr,p_name) + ) + partition by p_mfgr + order by p_mfgr)) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, p_name, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +p_size, sum(p_size) over (partition by p_mfgr order by p_name) as s1 +from noop(on + noop(on + noop(on + noop(on part + partition by p_mfgr,p_name + order by p_mfgr,p_name) + ) + partition by p_mfgr + order by p_mfgr)) +POSTHOOK: type: QUERY POSTHOOK: Input: default@part #### A masked pattern was here #### Manufacturer#1 almond antique burnished rose metallic 1 1 2 4 @@ -1135,6 +3220,7 @@ Manufacturer#5 almond antique sky peru orange 3 3 2 39 Manufacturer#5 almond aquamarine dodger light gainsboro 4 4 46 85 Manufacturer#5 almond azure blanched chiffon midnight 5 5 23 108 PREHOOK: query: -- 21. testMultiOperatorChainEndsWithNoopMap +explain select p_mfgr, p_name, rank() over (partition by p_mfgr,p_name) as r, dense_rank() over (partition by p_mfgr,p_name) as dr, @@ -1151,9 +3237,8 @@ from noopwithmap(on partition by p_mfgr,p_name order by p_mfgr,p_name) PREHOOK: type: QUERY -PREHOOK: Input: default@part -#### A masked pattern was here #### POSTHOOK: query: -- 21. testMultiOperatorChainEndsWithNoopMap +explain select p_mfgr, p_name, rank() over (partition by p_mfgr,p_name) as r, dense_rank() over (partition by p_mfgr,p_name) as dr, @@ -1170,6 +3255,153 @@ from noopwithmap(on partition by p_mfgr,p_name order by p_mfgr,p_name) POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-3 depends on stages: Stage-2 + Stage-4 depends on stages: Stage-3 + Stage-0 depends on stages: Stage-4 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_mfgr (type: string), p_name (type: string), p_mfgr (type: string), p_name (type: string) + sort order: ++++ + Map-reduce partition columns: p_mfgr (type: string), p_name (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int) + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col2 (type: string), _col2 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string), _col2 (type: string), _col1 (type: string) + sort order: ++++ + Map-reduce partition columns: _col2 (type: string), _col1 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string), _col2 (type: string), _col1 (type: string) + sort order: ++++ + Map-reduce partition columns: _col2 (type: string), _col1 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col5 (type: int) + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) + outputColumnNames: _col1, _col2, _col5 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col1 (type: string), _wcol0 (type: int), _wcol1 (type: int), _col5 (type: int), _wcol2 (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr, p_name, +rank() over (partition by p_mfgr,p_name) as r, +dense_rank() over (partition by p_mfgr,p_name) as dr, +p_size, sum(p_size) over (partition by p_mfgr,p_name rows between unbounded preceding and current row) as s1 +from noopwithmap(on + noop(on + noop(on + noop(on part + partition by p_mfgr,p_name + order by p_mfgr,p_name) + ) + partition by p_mfgr + order by p_mfgr) + partition by p_mfgr,p_name + order by p_mfgr,p_name) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, p_name, +rank() over (partition by p_mfgr,p_name) as r, +dense_rank() over (partition by p_mfgr,p_name) as dr, +p_size, sum(p_size) over (partition by p_mfgr,p_name rows between unbounded preceding and current row) as s1 +from noopwithmap(on + noop(on + noop(on + noop(on part + partition by p_mfgr,p_name + order by p_mfgr,p_name) + ) + partition by p_mfgr + order by p_mfgr) + partition by p_mfgr,p_name + order by p_mfgr,p_name) +POSTHOOK: type: QUERY POSTHOOK: Input: default@part #### A masked pattern was here #### Manufacturer#1 almond antique burnished rose metallic 1 1 2 2 @@ -1199,6 +3431,7 @@ Manufacturer#5 almond antique sky peru orange 1 1 2 2 Manufacturer#5 almond aquamarine dodger light gainsboro 1 1 46 46 Manufacturer#5 almond azure blanched chiffon midnight 1 1 23 23 PREHOOK: query: -- 22. testMultiOperatorChainWithDiffPartitionForWindow1 +explain select p_mfgr, p_name, rank() over (partition by p_mfgr,p_name order by p_mfgr,p_name) as r, dense_rank() over (partition by p_mfgr,p_name order by p_mfgr,p_name) as dr, @@ -1214,9 +3447,8 @@ from noop(on order by p_mfgr )) PREHOOK: type: QUERY -PREHOOK: Input: default@part -#### A masked pattern was here #### POSTHOOK: query: -- 22. testMultiOperatorChainWithDiffPartitionForWindow1 +explain select p_mfgr, p_name, rank() over (partition by p_mfgr,p_name order by p_mfgr,p_name) as r, dense_rank() over (partition by p_mfgr,p_name order by p_mfgr,p_name) as dr, @@ -1232,6 +3464,128 @@ from noop(on order by p_mfgr )) POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-3 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-3 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_mfgr (type: string), p_name (type: string), p_mfgr (type: string), p_name (type: string) + sort order: ++++ + Map-reduce partition columns: p_mfgr (type: string), p_name (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int) + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col2 (type: string), _col2 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string), _col2 (type: string), _col1 (type: string) + sort order: ++++ + Map-reduce partition columns: _col2 (type: string), _col1 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col5 (type: int) + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) + outputColumnNames: _col1, _col2, _col5 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col1 (type: string), _wcol0 (type: int), _wcol1 (type: int), _col5 (type: int), _wcol2 (type: bigint), _wcol2 (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr, p_name, +rank() over (partition by p_mfgr,p_name order by p_mfgr,p_name) as r, +dense_rank() over (partition by p_mfgr,p_name order by p_mfgr,p_name) as dr, +p_size, +sum(p_size) over (partition by p_mfgr,p_name order by p_mfgr,p_name rows between unbounded preceding and current row) as s1, +sum(p_size) over (partition by p_mfgr,p_name order by p_mfgr,p_name rows between unbounded preceding and current row) as s2 +from noop(on + noopwithmap(on + noop(on part + partition by p_mfgr, p_name + order by p_mfgr, p_name) + partition by p_mfgr + order by p_mfgr + )) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, p_name, +rank() over (partition by p_mfgr,p_name order by p_mfgr,p_name) as r, +dense_rank() over (partition by p_mfgr,p_name order by p_mfgr,p_name) as dr, +p_size, +sum(p_size) over (partition by p_mfgr,p_name order by p_mfgr,p_name rows between unbounded preceding and current row) as s1, +sum(p_size) over (partition by p_mfgr,p_name order by p_mfgr,p_name rows between unbounded preceding and current row) as s2 +from noop(on + noopwithmap(on + noop(on part + partition by p_mfgr, p_name + order by p_mfgr, p_name) + partition by p_mfgr + order by p_mfgr + )) +POSTHOOK: type: QUERY POSTHOOK: Input: default@part #### A masked pattern was here #### Manufacturer#1 almond antique burnished rose metallic 1 1 2 2 2 @@ -1261,6 +3615,7 @@ Manufacturer#5 almond antique sky peru orange 1 1 2 2 2 Manufacturer#5 almond aquamarine dodger light gainsboro 1 1 46 46 46 Manufacturer#5 almond azure blanched chiffon midnight 1 1 23 23 23 PREHOOK: query: -- 23. testMultiOperatorChainWithDiffPartitionForWindow2 +explain select p_mfgr, p_name, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -1274,9 +3629,8 @@ from noopwithmap(on order by p_mfgr, p_name) )) PREHOOK: type: QUERY -PREHOOK: Input: default@part -#### A masked pattern was here #### POSTHOOK: query: -- 23. testMultiOperatorChainWithDiffPartitionForWindow2 +explain select p_mfgr, p_name, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -1290,6 +3644,124 @@ from noopwithmap(on order by p_mfgr, p_name) )) POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-3 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-3 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_mfgr (type: string), p_name (type: string), p_mfgr (type: string), p_name (type: string) + sort order: ++++ + Map-reduce partition columns: p_mfgr (type: string), p_name (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int) + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string), _col2 (type: string), _col1 (type: string) + sort order: ++++ + Map-reduce partition columns: _col2 (type: string), _col1 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col5 (type: int) + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) + outputColumnNames: _col1, _col2, _col5 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col1 (type: string), _wcol0 (type: int), _wcol1 (type: int), _col5 (type: int), _wcol2 (type: bigint), _wcol2 (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr, p_name, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +p_size, +sum(p_size) over (partition by p_mfgr order by p_name range between unbounded preceding and current row) as s1, +sum(p_size) over (partition by p_mfgr order by p_name range between unbounded preceding and current row) as s2 +from noopwithmap(on + noop(on + noop(on part + partition by p_mfgr, p_name + order by p_mfgr, p_name) + )) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, p_name, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +p_size, +sum(p_size) over (partition by p_mfgr order by p_name range between unbounded preceding and current row) as s1, +sum(p_size) over (partition by p_mfgr order by p_name range between unbounded preceding and current row) as s2 +from noopwithmap(on + noop(on + noop(on part + partition by p_mfgr, p_name + order by p_mfgr, p_name) + )) +POSTHOOK: type: QUERY POSTHOOK: Input: default@part #### A masked pattern was here #### Manufacturer#1 almond antique burnished rose metallic 1 1 2 4 4 diff --git ql/src/test/results/clientpositive/ptf_streaming.q.out ql/src/test/results/clientpositive/ptf_streaming.q.out index ef7ae88..9cf645d 100644 --- ql/src/test/results/clientpositive/ptf_streaming.q.out +++ ql/src/test/results/clientpositive/ptf_streaming.q.out @@ -1,12 +1,7 @@ -PREHOOK: query: create temporary function noopstreaming as 'org.apache.hadoop.hive.ql.udf.ptf.NoopStreaming$NoopStreamingResolver' -PREHOOK: type: CREATEFUNCTION -PREHOOK: Output: noopstreaming -POSTHOOK: query: create temporary function noopstreaming as 'org.apache.hadoop.hive.ql.udf.ptf.NoopStreaming$NoopStreamingResolver' -POSTHOOK: type: CREATEFUNCTION -POSTHOOK: Output: noopstreaming PREHOOK: query: -- SORT_QUERY_RESULTS --1. test1 +explain select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -16,11 +11,10 @@ from noopstreaming(on part order by p_name ) PREHOOK: type: QUERY -PREHOOK: Input: default@part -#### A masked pattern was here #### POSTHOOK: query: -- SORT_QUERY_RESULTS --1. test1 +explain select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -30,6 +24,91 @@ from noopstreaming(on part order by p_name ) POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_mfgr (type: string), p_name (type: string) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int), p_retailprice (type: double) + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col5 (type: int), _col7 (type: double) + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) + outputColumnNames: _col1, _col2, _col5, _col7 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr, p_name, p_size, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +sum(p_retailprice) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s1 +from noopstreaming(on part + partition by p_mfgr + order by p_name + ) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, p_name, p_size, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +sum(p_retailprice) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s1 +from noopstreaming(on part + partition by p_mfgr + order by p_name + ) +POSTHOOK: type: QUERY POSTHOOK: Input: default@part #### A masked pattern was here #### Manufacturer#1 almond antique burnished rose metallic 2 1 1 1173.15 @@ -59,21 +138,141 @@ Manufacturer#5 almond antique sky peru orange 2 3 3 5190.08 Manufacturer#5 almond aquamarine dodger light gainsboro 46 4 4 6208.18 Manufacturer#5 almond azure blanched chiffon midnight 23 5 5 7672.66 PREHOOK: query: -- 2. testJoinWithNoop +explain select p_mfgr, p_name, p_size, p_size - lag(p_size,1,p_size) over (partition by p_mfgr order by p_name) as deltaSz from noopstreaming (on (select p1.* from part p1 join part p2 on p1.p_partkey = p2.p_partkey) j distribute by j.p_mfgr sort by j.p_name) PREHOOK: type: QUERY -PREHOOK: Input: default@part -#### A masked pattern was here #### POSTHOOK: query: -- 2. testJoinWithNoop +explain select p_mfgr, p_name, p_size, p_size - lag(p_size,1,p_size) over (partition by p_mfgr order by p_name) as deltaSz from noopstreaming (on (select p1.* from part p1 join part p2 on p1.p_partkey = p2.p_partkey) j distribute by j.p_mfgr sort by j.p_name) POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-3 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-3 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: p1 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: p_partkey is not null (type: boolean) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_partkey (type: int) + sort order: + + Map-reduce partition columns: p_partkey (type: int) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int) + TableScan + alias: p2 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: p_partkey is not null (type: boolean) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_partkey (type: int) + sort order: + + Map-reduce partition columns: p_partkey (type: int) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 p_partkey (type: int) + 1 p_partkey (type: int) + outputColumnNames: _col1, _col2, _col5 + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) + Reduce Operator Tree: + Extract + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + value expressions: _col5 (type: int) + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) + outputColumnNames: _col1, _col2, _col5 + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), (_col5 - _wcol0) (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr, p_name, +p_size, p_size - lag(p_size,1,p_size) over (partition by p_mfgr order by p_name) as deltaSz +from noopstreaming (on (select p1.* from part p1 join part p2 on p1.p_partkey = p2.p_partkey) j +distribute by j.p_mfgr +sort by j.p_name) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, p_name, +p_size, p_size - lag(p_size,1,p_size) over (partition by p_mfgr order by p_name) as deltaSz +from noopstreaming (on (select p1.* from part p1 join part p2 on p1.p_partkey = p2.p_partkey) j +distribute by j.p_mfgr +sort by j.p_name) +POSTHOOK: type: QUERY POSTHOOK: Input: default@part #### A masked pattern was here #### Manufacturer#1 almond antique burnished rose metallic 2 0 @@ -105,21 +304,116 @@ Manufacturer#5 almond antique sky peru orange 2 -4 Manufacturer#5 almond aquamarine dodger light gainsboro 46 44 Manufacturer#5 almond azure blanched chiffon midnight 23 -23 PREHOOK: query: -- 7. testJoin +explain select abc.* from noopstreaming(on part partition by p_mfgr order by p_name ) abc join part p1 on abc.p_partkey = p1.p_partkey PREHOOK: type: QUERY -PREHOOK: Input: default@part -#### A masked pattern was here #### POSTHOOK: query: -- 7. testJoin +explain select abc.* from noopstreaming(on part partition by p_mfgr order by p_name ) abc join part p1 on abc.p_partkey = p1.p_partkey POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_mfgr (type: string), p_name (type: string) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: _col0 is not null (type: boolean) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + TableScan + alias: p1 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: p_partkey is not null (type: boolean) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_partkey (type: int) + sort order: + + Map-reduce partition columns: p_partkey (type: int) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 p_partkey (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select abc.* +from noopstreaming(on part +partition by p_mfgr +order by p_name +) abc join part p1 on abc.p_partkey = p1.p_partkey +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select abc.* +from noopstreaming(on part +partition by p_mfgr +order by p_name +) abc join part p1 on abc.p_partkey = p1.p_partkey +POSTHOOK: type: QUERY POSTHOOK: Input: default@part #### A masked pattern was here #### 105685 almond antique violet chocolate turquoise Manufacturer#2 Brand#22 MEDIUM ANODIZED COPPER 14 MED CAN 1690.68 ly pending requ @@ -151,21 +445,101 @@ POSTHOOK: Input: default@part 86428 almond aquamarine burnished black steel Manufacturer#1 Brand#12 STANDARD ANODIZED STEEL 28 WRAP BAG 1414.42 arefully 90681 almond antique chartreuse khaki white Manufacturer#3 Brand#31 MEDIUM BURNISHED TIN 17 SM CASE 1671.68 are slyly after the sl PREHOOK: query: -- 9. testNoopWithMap +explain select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name, p_size desc) as r from noopwithmapstreaming(on part partition by p_mfgr order by p_name, p_size desc) PREHOOK: type: QUERY -PREHOOK: Input: default@part -#### A masked pattern was here #### POSTHOOK: query: -- 9. testNoopWithMap +explain select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name, p_size desc) as r from noopwithmapstreaming(on part partition by p_mfgr order by p_name, p_size desc) POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_mfgr (type: string), p_name (type: string), p_size (type: int) + sort order: ++- + Map-reduce partition columns: p_mfgr (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int) + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int) + sort order: ++- + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), KEY.reducesinkkey2 (type: int) + outputColumnNames: _col1, _col2, _col5 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr, p_name, p_size, +rank() over (partition by p_mfgr order by p_name, p_size desc) as r +from noopwithmapstreaming(on part +partition by p_mfgr +order by p_name, p_size desc) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, p_name, p_size, +rank() over (partition by p_mfgr order by p_name, p_size desc) as r +from noopwithmapstreaming(on part +partition by p_mfgr +order by p_name, p_size desc) +POSTHOOK: type: QUERY POSTHOOK: Input: default@part #### A masked pattern was here #### Manufacturer#1 almond antique burnished rose metallic 2 1 @@ -195,6 +569,7 @@ Manufacturer#5 almond antique sky peru orange 2 3 Manufacturer#5 almond aquamarine dodger light gainsboro 46 4 Manufacturer#5 almond azure blanched chiffon midnight 23 5 PREHOOK: query: -- 10. testNoopWithMapWithWindowing +explain select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -203,9 +578,8 @@ from noopwithmapstreaming(on part partition by p_mfgr order by p_name) PREHOOK: type: QUERY -PREHOOK: Input: default@part -#### A masked pattern was here #### POSTHOOK: query: -- 10. testNoopWithMapWithWindowing +explain select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -214,6 +588,91 @@ from noopwithmapstreaming(on part partition by p_mfgr order by p_name) POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_mfgr (type: string), p_name (type: string) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int), p_retailprice (type: double) + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col5 (type: int), _col7 (type: double) + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) + outputColumnNames: _col1, _col2, _col5, _col7 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr, p_name, p_size, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +sum(p_retailprice) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s1 +from noopwithmapstreaming(on part + partition by p_mfgr + order by p_name) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, p_name, p_size, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +sum(p_retailprice) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s1 +from noopwithmapstreaming(on part + partition by p_mfgr + order by p_name) +POSTHOOK: type: QUERY POSTHOOK: Input: default@part #### A masked pattern was here #### Manufacturer#1 almond antique burnished rose metallic 2 1 1 1173.15 @@ -243,6 +702,7 @@ Manufacturer#5 almond antique sky peru orange 2 3 3 5190.08 Manufacturer#5 almond aquamarine dodger light gainsboro 46 4 4 6208.18 Manufacturer#5 almond azure blanched chiffon midnight 23 5 5 7672.66 PREHOOK: query: -- 12. testFunctionChain +explain select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -252,9 +712,8 @@ partition by p_mfgr order by p_mfgr, p_name ))) PREHOOK: type: QUERY -PREHOOK: Input: default@part -#### A masked pattern was here #### POSTHOOK: query: -- 12. testFunctionChain +explain select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -264,6 +723,116 @@ partition by p_mfgr order by p_mfgr, p_name ))) POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-3 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-3 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_mfgr (type: string), p_mfgr (type: string), p_name (type: string) + sort order: +++ + Map-reduce partition columns: p_mfgr (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int), p_retailprice (type: double) + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col2 (type: string), _col2 (type: string), _col1 (type: string) + sort order: +++ + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int), _col7 (type: double) + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col5 (type: int), _col7 (type: double) + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) + outputColumnNames: _col1, _col2, _col5, _col7 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr, p_name, p_size, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +sum(p_retailprice) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s1 +from noopstreaming(on noopwithmapstreaming(on noopstreaming(on part +partition by p_mfgr +order by p_mfgr, p_name +))) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, p_name, p_size, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +sum(p_retailprice) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s1 +from noopstreaming(on noopwithmapstreaming(on noopstreaming(on part +partition by p_mfgr +order by p_mfgr, p_name +))) +POSTHOOK: type: QUERY POSTHOOK: Input: default@part #### A masked pattern was here #### Manufacturer#1 almond antique burnished rose metallic 2 1 1 1173.15 @@ -293,6 +862,7 @@ Manufacturer#5 almond antique sky peru orange 2 3 3 5190.08 Manufacturer#5 almond aquamarine dodger light gainsboro 46 4 4 6208.18 Manufacturer#5 almond azure blanched chiffon midnight 23 5 5 7672.66 PREHOOK: query: -- 12.1 testFunctionChain +explain select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -302,9 +872,8 @@ partition by p_mfgr order by p_mfgr, p_name ))) PREHOOK: type: QUERY -PREHOOK: Input: default@part -#### A masked pattern was here #### POSTHOOK: query: -- 12.1 testFunctionChain +explain select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -314,6 +883,116 @@ partition by p_mfgr order by p_mfgr, p_name ))) POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-3 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-3 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_mfgr (type: string), p_mfgr (type: string), p_name (type: string) + sort order: +++ + Map-reduce partition columns: p_mfgr (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int), p_retailprice (type: double) + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col2 (type: string), _col2 (type: string), _col1 (type: string) + sort order: +++ + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int), _col7 (type: double) + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col5 (type: int), _col7 (type: double) + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) + outputColumnNames: _col1, _col2, _col5, _col7 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr, p_name, p_size, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +sum(p_retailprice) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s1 +from noopstreaming(on noopwithmap(on noopstreaming(on part +partition by p_mfgr +order by p_mfgr, p_name +))) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, p_name, p_size, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +sum(p_retailprice) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s1 +from noopstreaming(on noopwithmap(on noopstreaming(on part +partition by p_mfgr +order by p_mfgr, p_name +))) +POSTHOOK: type: QUERY POSTHOOK: Input: default@part #### A masked pattern was here #### Manufacturer#1 almond antique burnished rose metallic 2 1 1 1173.15 @@ -343,6 +1022,7 @@ Manufacturer#5 almond antique sky peru orange 2 3 3 5190.08 Manufacturer#5 almond aquamarine dodger light gainsboro 46 4 4 6208.18 Manufacturer#5 almond azure blanched chiffon midnight 23 5 5 7672.66 PREHOOK: query: -- 12.2 testFunctionChain +explain select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -352,9 +1032,8 @@ partition by p_mfgr order by p_mfgr, p_name ))) PREHOOK: type: QUERY -PREHOOK: Input: default@part -#### A masked pattern was here #### POSTHOOK: query: -- 12.2 testFunctionChain +explain select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -364,6 +1043,116 @@ partition by p_mfgr order by p_mfgr, p_name ))) POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-3 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-3 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_mfgr (type: string), p_mfgr (type: string), p_name (type: string) + sort order: +++ + Map-reduce partition columns: p_mfgr (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int), p_retailprice (type: double) + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col2 (type: string), _col2 (type: string), _col1 (type: string) + sort order: +++ + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int), _col7 (type: double) + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col5 (type: int), _col7 (type: double) + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) + outputColumnNames: _col1, _col2, _col5, _col7 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr, p_name, p_size, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +sum(p_retailprice) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s1 +from noop(on noopwithmapstreaming(on noopstreaming(on part +partition by p_mfgr +order by p_mfgr, p_name +))) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, p_name, p_size, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +sum(p_retailprice) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s1 +from noop(on noopwithmapstreaming(on noopstreaming(on part +partition by p_mfgr +order by p_mfgr, p_name +))) +POSTHOOK: type: QUERY POSTHOOK: Input: default@part #### A masked pattern was here #### Manufacturer#1 almond antique burnished rose metallic 2 1 1 1173.15 @@ -393,6 +1182,7 @@ Manufacturer#5 almond antique sky peru orange 2 3 3 5190.08 Manufacturer#5 almond aquamarine dodger light gainsboro 46 4 4 6208.18 Manufacturer#5 almond azure blanched chiffon midnight 23 5 5 7672.66 PREHOOK: query: -- 14. testPTFJoinWithWindowingWithCount +explain select abc.p_mfgr, abc.p_name, rank() over (distribute by abc.p_mfgr sort by abc.p_name) as r, dense_rank() over (distribute by abc.p_mfgr sort by abc.p_name) as dr, @@ -404,9 +1194,8 @@ partition by p_mfgr order by p_name ) abc join part p1 on abc.p_partkey = p1.p_partkey PREHOOK: type: QUERY -PREHOOK: Input: default@part -#### A masked pattern was here #### POSTHOOK: query: -- 14. testPTFJoinWithWindowingWithCount +explain select abc.p_mfgr, abc.p_name, rank() over (distribute by abc.p_mfgr sort by abc.p_name) as r, dense_rank() over (distribute by abc.p_mfgr sort by abc.p_name) as dr, @@ -418,6 +1207,136 @@ partition by p_mfgr order by p_name ) abc join part p1 on abc.p_partkey = p1.p_partkey POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-3 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-3 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_mfgr (type: string), p_name (type: string) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_size (type: int), p_retailprice (type: double) + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: _col0 is not null (type: boolean) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int), _col7 (type: double) + TableScan + alias: p1 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: p_partkey is not null (type: boolean) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_partkey (type: int) + sort order: + + Map-reduce partition columns: p_partkey (type: int) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 p_partkey (type: int) + outputColumnNames: _col1, _col2, _col5, _col7 + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + value expressions: _col5 (type: int), _col7 (type: double) + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) + outputColumnNames: _col1, _col2, _col5, _col7 + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col1 (type: string), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: bigint), _col7 (type: double), _wcol3 (type: double), _col5 (type: int), (_col5 - _wcol4) (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select abc.p_mfgr, abc.p_name, +rank() over (distribute by abc.p_mfgr sort by abc.p_name) as r, +dense_rank() over (distribute by abc.p_mfgr sort by abc.p_name) as dr, +count(abc.p_name) over (distribute by abc.p_mfgr sort by abc.p_name) as cd, +abc.p_retailprice, sum(abc.p_retailprice) over (distribute by abc.p_mfgr sort by abc.p_name rows between unbounded preceding and current row) as s1, +abc.p_size, abc.p_size - lag(abc.p_size,1,abc.p_size) over (distribute by abc.p_mfgr sort by abc.p_name) as deltaSz +from noopstreaming(on part +partition by p_mfgr +order by p_name +) abc join part p1 on abc.p_partkey = p1.p_partkey +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select abc.p_mfgr, abc.p_name, +rank() over (distribute by abc.p_mfgr sort by abc.p_name) as r, +dense_rank() over (distribute by abc.p_mfgr sort by abc.p_name) as dr, +count(abc.p_name) over (distribute by abc.p_mfgr sort by abc.p_name) as cd, +abc.p_retailprice, sum(abc.p_retailprice) over (distribute by abc.p_mfgr sort by abc.p_name rows between unbounded preceding and current row) as s1, +abc.p_size, abc.p_size - lag(abc.p_size,1,abc.p_size) over (distribute by abc.p_mfgr sort by abc.p_name) as deltaSz +from noopstreaming(on part +partition by p_mfgr +order by p_name +) abc join part p1 on abc.p_partkey = p1.p_partkey +POSTHOOK: type: QUERY POSTHOOK: Input: default@part #### A masked pattern was here #### Manufacturer#1 almond antique burnished rose metallic 1 1 4 1173.15 1173.15 2 0 @@ -449,6 +1368,7 @@ Manufacturer#5 almond antique sky peru orange 3 3 3 1788.73 5190.08 2 -4 Manufacturer#5 almond aquamarine dodger light gainsboro 4 4 4 1018.1 6208.18 46 44 Manufacturer#5 almond azure blanched chiffon midnight 5 5 5 1464.48 7672.66 23 -23 PREHOOK: query: -- 18. testMulti2OperatorsFunctionChainWithMap +explain select p_mfgr, p_name, rank() over (partition by p_mfgr,p_name) as r, dense_rank() over (partition by p_mfgr,p_name) as dr, @@ -465,9 +1385,8 @@ from noopstreaming(on partition by p_mfgr,p_name order by p_mfgr,p_name) PREHOOK: type: QUERY -PREHOOK: Input: default@part -#### A masked pattern was here #### POSTHOOK: query: -- 18. testMulti2OperatorsFunctionChainWithMap +explain select p_mfgr, p_name, rank() over (partition by p_mfgr,p_name) as r, dense_rank() over (partition by p_mfgr,p_name) as dr, @@ -484,6 +1403,130 @@ from noopstreaming(on partition by p_mfgr,p_name order by p_mfgr,p_name) POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-3 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-3 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_mfgr (type: string), p_mfgr (type: string) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int) + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string), _col2 (type: string), _col1 (type: string) + sort order: ++++ + Map-reduce partition columns: _col2 (type: string), _col1 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string), _col2 (type: string), _col1 (type: string) + sort order: ++++ + Map-reduce partition columns: _col2 (type: string), _col1 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col5 (type: int) + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) + outputColumnNames: _col1, _col2, _col5 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col1 (type: string), _wcol0 (type: int), _wcol1 (type: int), _col5 (type: int), _wcol2 (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr, p_name, +rank() over (partition by p_mfgr,p_name) as r, +dense_rank() over (partition by p_mfgr,p_name) as dr, +p_size, sum(p_size) over (partition by p_mfgr,p_name rows between unbounded preceding and current row) as s1 +from noopstreaming(on + noopwithmap(on + noop(on + noopstreaming(on part + partition by p_mfgr + order by p_mfgr) + ) + partition by p_mfgr,p_name + order by p_mfgr,p_name) + partition by p_mfgr,p_name + order by p_mfgr,p_name) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, p_name, +rank() over (partition by p_mfgr,p_name) as r, +dense_rank() over (partition by p_mfgr,p_name) as dr, +p_size, sum(p_size) over (partition by p_mfgr,p_name rows between unbounded preceding and current row) as s1 +from noopstreaming(on + noopwithmap(on + noop(on + noopstreaming(on part + partition by p_mfgr + order by p_mfgr) + ) + partition by p_mfgr,p_name + order by p_mfgr,p_name) + partition by p_mfgr,p_name + order by p_mfgr,p_name) +POSTHOOK: type: QUERY POSTHOOK: Input: default@part #### A masked pattern was here #### Manufacturer#1 almond antique burnished rose metallic 1 1 2 2 @@ -513,6 +1556,7 @@ Manufacturer#5 almond antique sky peru orange 1 1 2 2 Manufacturer#5 almond aquamarine dodger light gainsboro 1 1 46 46 Manufacturer#5 almond azure blanched chiffon midnight 1 1 23 23 PREHOOK: query: -- 19. testMulti3OperatorsFunctionChain +explain select p_mfgr, p_name, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -529,9 +1573,8 @@ from noop(on partition by p_mfgr order by p_mfgr ) PREHOOK: type: QUERY -PREHOOK: Input: default@part -#### A masked pattern was here #### POSTHOOK: query: -- 19. testMulti3OperatorsFunctionChain +explain select p_mfgr, p_name, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -548,6 +1591,151 @@ from noop(on partition by p_mfgr order by p_mfgr ) POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-3 depends on stages: Stage-2 + Stage-4 depends on stages: Stage-3 + Stage-0 depends on stages: Stage-4 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_mfgr (type: string), p_mfgr (type: string) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int) + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string), _col2 (type: string), _col1 (type: string) + sort order: ++++ + Map-reduce partition columns: _col2 (type: string), _col1 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col2 (type: string), _col2 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col5 (type: int) + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) + outputColumnNames: _col1, _col2, _col5 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col1 (type: string), _wcol0 (type: int), _wcol1 (type: int), _col5 (type: int), _wcol2 (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr, p_name, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +p_size, sum(p_size) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s1 +from noop(on + noopstreaming(on + noop(on + noopstreaming(on part + partition by p_mfgr + order by p_mfgr) + ) + partition by p_mfgr,p_name + order by p_mfgr,p_name) + partition by p_mfgr + order by p_mfgr ) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, p_name, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +p_size, sum(p_size) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s1 +from noop(on + noopstreaming(on + noop(on + noopstreaming(on part + partition by p_mfgr + order by p_mfgr) + ) + partition by p_mfgr,p_name + order by p_mfgr,p_name) + partition by p_mfgr + order by p_mfgr ) +POSTHOOK: type: QUERY POSTHOOK: Input: default@part #### A masked pattern was here #### Manufacturer#1 almond antique burnished rose metallic 1 1 2 2 @@ -577,6 +1765,7 @@ Manufacturer#5 almond antique sky peru orange 3 3 2 39 Manufacturer#5 almond aquamarine dodger light gainsboro 4 4 46 85 Manufacturer#5 almond azure blanched chiffon midnight 5 5 23 108 PREHOOK: query: -- 23. testMultiOperatorChainWithDiffPartitionForWindow2 +explain select p_mfgr, p_name, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -590,9 +1779,8 @@ from noopwithmapstreaming(on order by p_mfgr, p_name) )) PREHOOK: type: QUERY -PREHOOK: Input: default@part -#### A masked pattern was here #### POSTHOOK: query: -- 23. testMultiOperatorChainWithDiffPartitionForWindow2 +explain select p_mfgr, p_name, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -606,6 +1794,124 @@ from noopwithmapstreaming(on order by p_mfgr, p_name) )) POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-3 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-3 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_mfgr (type: string), p_name (type: string), p_mfgr (type: string), p_name (type: string) + sort order: ++++ + Map-reduce partition columns: p_mfgr (type: string), p_name (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int) + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string), _col2 (type: string), _col1 (type: string) + sort order: ++++ + Map-reduce partition columns: _col2 (type: string), _col1 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col5 (type: int) + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) + outputColumnNames: _col1, _col2, _col5 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col1 (type: string), _wcol0 (type: int), _wcol1 (type: int), _col5 (type: int), _wcol2 (type: bigint), _wcol2 (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr, p_name, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +p_size, +sum(p_size) over (partition by p_mfgr order by p_name range between unbounded preceding and current row) as s1, +sum(p_size) over (partition by p_mfgr order by p_name range between unbounded preceding and current row) as s2 +from noopwithmapstreaming(on + noop(on + noopstreaming(on part + partition by p_mfgr, p_name + order by p_mfgr, p_name) + )) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, p_name, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +p_size, +sum(p_size) over (partition by p_mfgr order by p_name range between unbounded preceding and current row) as s1, +sum(p_size) over (partition by p_mfgr order by p_name range between unbounded preceding and current row) as s2 +from noopwithmapstreaming(on + noop(on + noopstreaming(on part + partition by p_mfgr, p_name + order by p_mfgr, p_name) + )) +POSTHOOK: type: QUERY POSTHOOK: Input: default@part #### A masked pattern was here #### Manufacturer#1 almond antique burnished rose metallic 1 1 2 4 4 diff --git ql/src/test/results/clientpositive/spark/ptf.q.out ql/src/test/results/clientpositive/spark/ptf.q.out index 9196b94..ad7d9d5 100644 --- ql/src/test/results/clientpositive/spark/ptf.q.out +++ ql/src/test/results/clientpositive/spark/ptf.q.out @@ -1,6 +1,7 @@ PREHOOK: query: -- SORT_QUERY_RESULTS --1. test1 +explain select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -10,11 +11,10 @@ from noop(on part order by p_name ) PREHOOK: type: QUERY -PREHOOK: Input: default@part -#### A masked pattern was here #### POSTHOOK: query: -- SORT_QUERY_RESULTS --1. test1 +explain select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -24,6 +24,87 @@ from noop(on part order by p_name ) POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_mfgr (type: string), p_name (type: string) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int), p_retailprice (type: double) + Reducer 2 + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col5 (type: int), _col7 (type: double) + Reducer 3 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) + outputColumnNames: _col1, _col2, _col5, _col7 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr, p_name, p_size, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +sum(p_retailprice) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s1 +from noop(on part + partition by p_mfgr + order by p_name + ) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, p_name, p_size, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +sum(p_retailprice) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s1 +from noop(on part + partition by p_mfgr + order by p_name + ) +POSTHOOK: type: QUERY POSTHOOK: Input: default@part #### A masked pattern was here #### Manufacturer#1 almond antique burnished rose metallic 2 1 1 1173.15 @@ -53,21 +134,129 @@ Manufacturer#5 almond antique sky peru orange 2 3 3 5190.08 Manufacturer#5 almond aquamarine dodger light gainsboro 46 4 4 6208.18 Manufacturer#5 almond azure blanched chiffon midnight 23 5 5 7672.66 PREHOOK: query: -- 2. testJoinWithNoop +explain select p_mfgr, p_name, p_size, p_size - lag(p_size,1,p_size) over (partition by p_mfgr order by p_name) as deltaSz from noop (on (select p1.* from part p1 join part p2 on p1.p_partkey = p2.p_partkey) j distribute by j.p_mfgr sort by j.p_name) PREHOOK: type: QUERY -PREHOOK: Input: default@part -#### A masked pattern was here #### POSTHOOK: query: -- 2. testJoinWithNoop +explain select p_mfgr, p_name, p_size, p_size - lag(p_size,1,p_size) over (partition by p_mfgr order by p_name) as deltaSz from noop (on (select p1.* from part p1 join part p2 on p1.p_partkey = p2.p_partkey) j distribute by j.p_mfgr sort by j.p_name) POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 5 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 2) + Reducer 4 <- Reducer 3 (PARTITION-LEVEL SORT, 2) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: p1 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: p_partkey is not null (type: boolean) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_partkey (type: int) + sort order: + + Map-reduce partition columns: p_partkey (type: int) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int) + Map 5 + Map Operator Tree: + TableScan + alias: p2 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: p_partkey is not null (type: boolean) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_partkey (type: int) + sort order: + + Map-reduce partition columns: p_partkey (type: int) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 p_partkey (type: int) + 1 p_partkey (type: int) + outputColumnNames: _col1, _col2, _col5 + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) + Reducer 3 + Reduce Operator Tree: + Extract + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + value expressions: _col5 (type: int) + Reducer 4 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) + outputColumnNames: _col1, _col2, _col5 + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), (_col5 - _wcol0) (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr, p_name, +p_size, p_size - lag(p_size,1,p_size) over (partition by p_mfgr order by p_name) as deltaSz +from noop (on (select p1.* from part p1 join part p2 on p1.p_partkey = p2.p_partkey) j +distribute by j.p_mfgr +sort by j.p_name) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, p_name, +p_size, p_size - lag(p_size,1,p_size) over (partition by p_mfgr order by p_name) as deltaSz +from noop (on (select p1.* from part p1 join part p2 on p1.p_partkey = p2.p_partkey) j +distribute by j.p_mfgr +sort by j.p_name) +POSTHOOK: type: QUERY POSTHOOK: Input: default@part #### A masked pattern was here #### Manufacturer#1 almond antique burnished rose metallic 2 0 @@ -99,19 +288,77 @@ Manufacturer#5 almond antique sky peru orange 2 -4 Manufacturer#5 almond aquamarine dodger light gainsboro 46 44 Manufacturer#5 almond azure blanched chiffon midnight 23 -23 PREHOOK: query: -- 3. testOnlyPTF +explain select p_mfgr, p_name, p_size from noop(on part partition by p_mfgr order by p_name) PREHOOK: type: QUERY -PREHOOK: Input: default@part -#### A masked pattern was here #### POSTHOOK: query: -- 3. testOnlyPTF +explain select p_mfgr, p_name, p_size from noop(on part partition by p_mfgr order by p_name) POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_mfgr (type: string), p_name (type: string) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int) + Reducer 2 + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr, p_name, p_size +from noop(on part +partition by p_mfgr +order by p_name) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, p_name, p_size +from noop(on part +partition by p_mfgr +order by p_name) +POSTHOOK: type: QUERY POSTHOOK: Input: default@part #### A masked pattern was here #### Manufacturer#1 almond antique burnished rose metallic 2 @@ -141,6 +388,7 @@ Manufacturer#5 almond antique sky peru orange 2 Manufacturer#5 almond aquamarine dodger light gainsboro 46 Manufacturer#5 almond azure blanched chiffon midnight 23 PREHOOK: query: -- 4. testPTFAlias +explain select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -150,9 +398,8 @@ from noop(on part order by p_name ) abc PREHOOK: type: QUERY -PREHOOK: Input: default@part -#### A masked pattern was here #### POSTHOOK: query: -- 4. testPTFAlias +explain select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -162,6 +409,87 @@ from noop(on part order by p_name ) abc POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 2) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_mfgr (type: string), p_name (type: string) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int), p_retailprice (type: double) + Reducer 2 + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col5 (type: int), _col7 (type: double) + Reducer 3 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) + outputColumnNames: _col1, _col2, _col5, _col7 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr, p_name, p_size, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +sum(p_retailprice) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s1 +from noop(on part + partition by p_mfgr + order by p_name + ) abc +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, p_name, p_size, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +sum(p_retailprice) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s1 +from noop(on part + partition by p_mfgr + order by p_name + ) abc +POSTHOOK: type: QUERY POSTHOOK: Input: default@part #### A masked pattern was here #### Manufacturer#1 almond antique burnished rose metallic 2 1 1 1173.15 @@ -191,6 +519,7 @@ Manufacturer#5 almond antique sky peru orange 2 3 3 5190.08 Manufacturer#5 almond aquamarine dodger light gainsboro 46 4 4 6208.18 Manufacturer#5 almond azure blanched chiffon midnight 23 5 5 7672.66 PREHOOK: query: -- 5. testPTFAndWhereWithWindowing +explain select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -200,9 +529,8 @@ from noop(on part order by p_name ) PREHOOK: type: QUERY -PREHOOK: Input: default@part -#### A masked pattern was here #### POSTHOOK: query: -- 5. testPTFAndWhereWithWindowing +explain select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -212,6 +540,87 @@ from noop(on part order by p_name ) POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 2) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_mfgr (type: string), p_name (type: string) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int) + Reducer 2 + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col5 (type: int) + Reducer 3 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) + outputColumnNames: _col1, _col2, _col5 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _col5 (type: int), (_col5 - _wcol2) (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr, p_name, p_size, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +p_size, p_size - lag(p_size,1,p_size) over (partition by p_mfgr order by p_name) as deltaSz +from noop(on part + partition by p_mfgr + order by p_name + ) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, p_name, p_size, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +p_size, p_size - lag(p_size,1,p_size) over (partition by p_mfgr order by p_name) as deltaSz +from noop(on part + partition by p_mfgr + order by p_name + ) +POSTHOOK: type: QUERY POSTHOOK: Input: default@part #### A masked pattern was here #### Manufacturer#1 almond antique burnished rose metallic 2 1 1 2 0 @@ -241,6 +650,7 @@ Manufacturer#5 almond antique sky peru orange 2 3 3 2 -4 Manufacturer#5 almond aquamarine dodger light gainsboro 46 4 4 46 44 Manufacturer#5 almond azure blanched chiffon midnight 23 5 5 23 -23 PREHOOK: query: -- 6. testSWQAndPTFAndGBy +explain select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -251,9 +661,8 @@ from noop(on part ) group by p_mfgr, p_name, p_size PREHOOK: type: QUERY -PREHOOK: Input: default@part -#### A masked pattern was here #### POSTHOOK: query: -- 6. testSWQAndPTFAndGBy +explain select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -264,6 +673,111 @@ from noop(on part ) group by p_mfgr, p_name, p_size POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 2 (GROUP, 2) + Reducer 4 <- Reducer 3 (PARTITION-LEVEL SORT, 2) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_mfgr (type: string), p_name (type: string) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int) + Reducer 2 + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int) + outputColumnNames: _col2, _col1, _col5 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col2 (type: string), _col1 (type: string), _col5 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reducer 3 + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: int) + Reducer 4 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), VALUE._col0 (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int), _wcol0 (type: int), _wcol1 (type: int), _col2 (type: int), (_col2 - _wcol2) (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr, p_name, p_size, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +p_size, p_size - lag(p_size,1,p_size) over (partition by p_mfgr order by p_name) as deltaSz +from noop(on part + partition by p_mfgr + order by p_name + ) +group by p_mfgr, p_name, p_size +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, p_name, p_size, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +p_size, p_size - lag(p_size,1,p_size) over (partition by p_mfgr order by p_name) as deltaSz +from noop(on part + partition by p_mfgr + order by p_name + ) +group by p_mfgr, p_name, p_size +POSTHOOK: type: QUERY POSTHOOK: Input: default@part #### A masked pattern was here #### Manufacturer#1 almond antique burnished rose metallic 2 1 1 2 0 @@ -292,21 +806,114 @@ Manufacturer#5 almond antique sky peru orange 2 3 3 2 -4 Manufacturer#5 almond aquamarine dodger light gainsboro 46 4 4 46 44 Manufacturer#5 almond azure blanched chiffon midnight 23 5 5 23 -23 PREHOOK: query: -- 7. testJoin +explain select abc.* from noop(on part partition by p_mfgr order by p_name ) abc join part p1 on abc.p_partkey = p1.p_partkey PREHOOK: type: QUERY -PREHOOK: Input: default@part -#### A masked pattern was here #### POSTHOOK: query: -- 7. testJoin +explain select abc.* from noop(on part partition by p_mfgr order by p_name ) abc join part p1 on abc.p_partkey = p1.p_partkey POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Map 4 (PARTITION-LEVEL SORT, 2), Reducer 2 (PARTITION-LEVEL SORT, 2) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_mfgr (type: string), p_name (type: string) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + Map 4 + Map Operator Tree: + TableScan + alias: p1 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: p_partkey is not null (type: boolean) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_partkey (type: int) + sort order: + + Map-reduce partition columns: p_partkey (type: int) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: _col0 is not null (type: boolean) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + Reducer 3 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 p_partkey (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select abc.* +from noop(on part +partition by p_mfgr +order by p_name +) abc join part p1 on abc.p_partkey = p1.p_partkey +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select abc.* +from noop(on part +partition by p_mfgr +order by p_name +) abc join part p1 on abc.p_partkey = p1.p_partkey +POSTHOOK: type: QUERY POSTHOOK: Input: default@part #### A masked pattern was here #### 105685 almond antique violet chocolate turquoise Manufacturer#2 Brand#22 MEDIUM ANODIZED COPPER 14 MED CAN 1690.68 ly pending requ @@ -338,21 +945,114 @@ POSTHOOK: Input: default@part 86428 almond aquamarine burnished black steel Manufacturer#1 Brand#12 STANDARD ANODIZED STEEL 28 WRAP BAG 1414.42 arefully 90681 almond antique chartreuse khaki white Manufacturer#3 Brand#31 MEDIUM BURNISHED TIN 17 SM CASE 1671.68 are slyly after the sl PREHOOK: query: -- 8. testJoinRight +explain select abc.* from part p1 join noop(on part partition by p_mfgr order by p_name ) abc on abc.p_partkey = p1.p_partkey PREHOOK: type: QUERY -PREHOOK: Input: default@part -#### A masked pattern was here #### POSTHOOK: query: -- 8. testJoinRight +explain select abc.* from part p1 join noop(on part partition by p_mfgr order by p_name ) abc on abc.p_partkey = p1.p_partkey POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 4 (PARTITION-LEVEL SORT, 2) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: p1 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: p_partkey is not null (type: boolean) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_partkey (type: int) + sort order: + + Map-reduce partition columns: p_partkey (type: int) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Map 3 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_mfgr (type: string), p_name (type: string) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 p_partkey (type: int) + 1 _col0 (type: int) + outputColumnNames: _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col12 (type: int), _col13 (type: string), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: _col0 is not null (type: boolean) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select abc.* +from part p1 join noop(on part +partition by p_mfgr +order by p_name +) abc on abc.p_partkey = p1.p_partkey +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select abc.* +from part p1 join noop(on part +partition by p_mfgr +order by p_name +) abc on abc.p_partkey = p1.p_partkey +POSTHOOK: type: QUERY POSTHOOK: Input: default@part #### A masked pattern was here #### 105685 almond antique violet chocolate turquoise Manufacturer#2 Brand#22 MEDIUM ANODIZED COPPER 14 MED CAN 1690.68 ly pending requ @@ -384,21 +1084,97 @@ POSTHOOK: Input: default@part 86428 almond aquamarine burnished black steel Manufacturer#1 Brand#12 STANDARD ANODIZED STEEL 28 WRAP BAG 1414.42 arefully 90681 almond antique chartreuse khaki white Manufacturer#3 Brand#31 MEDIUM BURNISHED TIN 17 SM CASE 1671.68 are slyly after the sl PREHOOK: query: -- 9. testNoopWithMap +explain select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name, p_size desc) as r from noopwithmap(on part partition by p_mfgr order by p_name, p_size desc) PREHOOK: type: QUERY -PREHOOK: Input: default@part -#### A masked pattern was here #### POSTHOOK: query: -- 9. testNoopWithMap +explain select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name, p_size desc) as r from noopwithmap(on part partition by p_mfgr order by p_name, p_size desc) POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 2) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_mfgr (type: string), p_name (type: string), p_size (type: int) + sort order: ++- + Map-reduce partition columns: p_mfgr (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int) + Reducer 2 + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int) + sort order: ++- + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reducer 3 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), KEY.reducesinkkey2 (type: int) + outputColumnNames: _col1, _col2, _col5 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr, p_name, p_size, +rank() over (partition by p_mfgr order by p_name, p_size desc) as r +from noopwithmap(on part +partition by p_mfgr +order by p_name, p_size desc) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, p_name, p_size, +rank() over (partition by p_mfgr order by p_name, p_size desc) as r +from noopwithmap(on part +partition by p_mfgr +order by p_name, p_size desc) +POSTHOOK: type: QUERY POSTHOOK: Input: default@part #### A masked pattern was here #### Manufacturer#1 almond antique burnished rose metallic 2 1 @@ -428,6 +1204,7 @@ Manufacturer#5 almond antique sky peru orange 2 3 Manufacturer#5 almond aquamarine dodger light gainsboro 46 4 Manufacturer#5 almond azure blanched chiffon midnight 23 5 PREHOOK: query: -- 10. testNoopWithMapWithWindowing +explain select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -436,9 +1213,8 @@ from noopwithmap(on part partition by p_mfgr order by p_name) PREHOOK: type: QUERY -PREHOOK: Input: default@part -#### A masked pattern was here #### POSTHOOK: query: -- 10. testNoopWithMapWithWindowing +explain select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -447,6 +1223,87 @@ from noopwithmap(on part partition by p_mfgr order by p_name) POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 2) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_mfgr (type: string), p_name (type: string) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int), p_retailprice (type: double) + Reducer 2 + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col5 (type: int), _col7 (type: double) + Reducer 3 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) + outputColumnNames: _col1, _col2, _col5, _col7 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr, p_name, p_size, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +sum(p_retailprice) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s1 +from noopwithmap(on part + partition by p_mfgr + order by p_name) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, p_name, p_size, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +sum(p_retailprice) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s1 +from noopwithmap(on part + partition by p_mfgr + order by p_name) +POSTHOOK: type: QUERY POSTHOOK: Input: default@part #### A masked pattern was here #### Manufacturer#1 almond antique burnished rose metallic 2 1 1 1173.15 @@ -476,6 +1333,7 @@ Manufacturer#5 almond antique sky peru orange 2 3 3 5190.08 Manufacturer#5 almond aquamarine dodger light gainsboro 46 4 4 6208.18 Manufacturer#5 almond azure blanched chiffon midnight 23 5 5 7672.66 PREHOOK: query: -- 11. testHavingWithWindowingPTFNoGBY +explain select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -484,9 +1342,8 @@ from noop(on part partition by p_mfgr order by p_name) PREHOOK: type: QUERY -PREHOOK: Input: default@part -#### A masked pattern was here #### POSTHOOK: query: -- 11. testHavingWithWindowingPTFNoGBY +explain select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -495,6 +1352,85 @@ from noop(on part partition by p_mfgr order by p_name) POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 2) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_mfgr (type: string), p_name (type: string) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int), p_retailprice (type: double) + Reducer 2 + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col5 (type: int), _col7 (type: double) + Reducer 3 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) + outputColumnNames: _col1, _col2, _col5, _col7 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr, p_name, p_size, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +sum(p_retailprice) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s1 +from noop(on part +partition by p_mfgr +order by p_name) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, p_name, p_size, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +sum(p_retailprice) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s1 +from noop(on part +partition by p_mfgr +order by p_name) +POSTHOOK: type: QUERY POSTHOOK: Input: default@part #### A masked pattern was here #### Manufacturer#1 almond antique burnished rose metallic 2 1 1 1173.15 @@ -524,6 +1460,7 @@ Manufacturer#5 almond antique sky peru orange 2 3 3 5190.08 Manufacturer#5 almond aquamarine dodger light gainsboro 46 4 4 6208.18 Manufacturer#5 almond azure blanched chiffon midnight 23 5 5 7672.66 PREHOOK: query: -- 12. testFunctionChain +explain select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -533,9 +1470,8 @@ partition by p_mfgr order by p_mfgr, p_name ))) PREHOOK: type: QUERY -PREHOOK: Input: default@part -#### A masked pattern was here #### POSTHOOK: query: -- 12. testFunctionChain +explain select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -545,6 +1481,102 @@ partition by p_mfgr order by p_mfgr, p_name ))) POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 2) + Reducer 4 <- Reducer 3 (PARTITION-LEVEL SORT, 2) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_mfgr (type: string), p_mfgr (type: string), p_name (type: string) + sort order: +++ + Map-reduce partition columns: p_mfgr (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int), p_retailprice (type: double) + Reducer 2 + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: string), _col2 (type: string), _col1 (type: string) + sort order: +++ + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int), _col7 (type: double) + Reducer 3 + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col5 (type: int), _col7 (type: double) + Reducer 4 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) + outputColumnNames: _col1, _col2, _col5, _col7 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr, p_name, p_size, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +sum(p_retailprice) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s1 +from noop(on noopwithmap(on noop(on part +partition by p_mfgr +order by p_mfgr, p_name +))) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, p_name, p_size, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +sum(p_retailprice) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s1 +from noop(on noopwithmap(on noop(on part +partition by p_mfgr +order by p_mfgr, p_name +))) +POSTHOOK: type: QUERY POSTHOOK: Input: default@part #### A masked pattern was here #### Manufacturer#1 almond antique burnished rose metallic 2 1 1 1173.15 @@ -574,6 +1606,7 @@ Manufacturer#5 almond antique sky peru orange 2 3 3 5190.08 Manufacturer#5 almond aquamarine dodger light gainsboro 46 4 4 6208.18 Manufacturer#5 almond azure blanched chiffon midnight 23 5 5 7672.66 PREHOOK: query: -- 13. testPTFAndWindowingInSubQ +explain select p_mfgr, p_name, sub1.cd, sub1.s1 from (select p_mfgr, p_name, @@ -586,9 +1619,8 @@ order by p_name) window w1 as (partition by p_mfgr order by p_name rows between 2 preceding and 2 following) ) sub1 PREHOOK: type: QUERY -PREHOOK: Input: default@part -#### A masked pattern was here #### POSTHOOK: query: -- 13. testPTFAndWindowingInSubQ +explain select p_mfgr, p_name, sub1.cd, sub1.s1 from (select p_mfgr, p_name, @@ -601,6 +1633,93 @@ order by p_name) window w1 as (partition by p_mfgr order by p_name rows between 2 preceding and 2 following) ) sub1 POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 2) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_mfgr (type: string), p_name (type: string) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int), p_retailprice (type: double) + Reducer 2 + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col5 (type: int), _col7 (type: double) + Reducer 3 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) + outputColumnNames: _col1, _col2, _col5, _col7 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col1 (type: string), _wcol0 (type: bigint), _wcol1 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr, p_name, +sub1.cd, sub1.s1 +from (select p_mfgr, p_name, +count(p_size) over (partition by p_mfgr order by p_name) as cd, +p_retailprice, +sum(p_retailprice) over w1 as s1 +from noop(on part +partition by p_mfgr +order by p_name) +window w1 as (partition by p_mfgr order by p_name rows between 2 preceding and 2 following) +) sub1 +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, p_name, +sub1.cd, sub1.s1 +from (select p_mfgr, p_name, +count(p_size) over (partition by p_mfgr order by p_name) as cd, +p_retailprice, +sum(p_retailprice) over w1 as s1 +from noop(on part +partition by p_mfgr +order by p_name) +window w1 as (partition by p_mfgr order by p_name rows between 2 preceding and 2 following) +) sub1 +POSTHOOK: type: QUERY POSTHOOK: Input: default@part #### A masked pattern was here #### Manufacturer#1 almond antique burnished rose metallic 2 4100.06 @@ -630,6 +1749,7 @@ Manufacturer#5 almond antique sky peru orange 3 7672.66 Manufacturer#5 almond aquamarine dodger light gainsboro 4 5882.970000000001 Manufacturer#5 almond azure blanched chiffon midnight 5 4271.3099999999995 PREHOOK: query: -- 14. testPTFJoinWithWindowingWithCount +explain select abc.p_mfgr, abc.p_name, rank() over (distribute by abc.p_mfgr sort by abc.p_name) as r, dense_rank() over (distribute by abc.p_mfgr sort by abc.p_name) as dr, @@ -641,9 +1761,8 @@ partition by p_mfgr order by p_name ) abc join part p1 on abc.p_partkey = p1.p_partkey PREHOOK: type: QUERY -PREHOOK: Input: default@part -#### A masked pattern was here #### POSTHOOK: query: -- 14. testPTFJoinWithWindowingWithCount +explain select abc.p_mfgr, abc.p_name, rank() over (distribute by abc.p_mfgr sort by abc.p_name) as r, dense_rank() over (distribute by abc.p_mfgr sort by abc.p_name) as dr, @@ -655,6 +1774,124 @@ partition by p_mfgr order by p_name ) abc join part p1 on abc.p_partkey = p1.p_partkey POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Map 5 (PARTITION-LEVEL SORT, 2), Reducer 2 (PARTITION-LEVEL SORT, 2) + Reducer 4 <- Reducer 3 (PARTITION-LEVEL SORT, 2) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_mfgr (type: string), p_name (type: string) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_size (type: int), p_retailprice (type: double) + Map 5 + Map Operator Tree: + TableScan + alias: p1 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: p_partkey is not null (type: boolean) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_partkey (type: int) + sort order: + + Map-reduce partition columns: p_partkey (type: int) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: _col0 is not null (type: boolean) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int), _col7 (type: double) + Reducer 3 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 p_partkey (type: int) + outputColumnNames: _col1, _col2, _col5, _col7 + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + value expressions: _col5 (type: int), _col7 (type: double) + Reducer 4 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) + outputColumnNames: _col1, _col2, _col5, _col7 + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col1 (type: string), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: bigint), _col7 (type: double), _wcol3 (type: double), _col5 (type: int), (_col5 - _wcol4) (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select abc.p_mfgr, abc.p_name, +rank() over (distribute by abc.p_mfgr sort by abc.p_name) as r, +dense_rank() over (distribute by abc.p_mfgr sort by abc.p_name) as dr, +count(abc.p_name) over (distribute by abc.p_mfgr sort by abc.p_name) as cd, +abc.p_retailprice, sum(abc.p_retailprice) over (distribute by abc.p_mfgr sort by abc.p_name rows between unbounded preceding and current row) as s1, +abc.p_size, abc.p_size - lag(abc.p_size,1,abc.p_size) over (distribute by abc.p_mfgr sort by abc.p_name) as deltaSz +from noop(on part +partition by p_mfgr +order by p_name +) abc join part p1 on abc.p_partkey = p1.p_partkey +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select abc.p_mfgr, abc.p_name, +rank() over (distribute by abc.p_mfgr sort by abc.p_name) as r, +dense_rank() over (distribute by abc.p_mfgr sort by abc.p_name) as dr, +count(abc.p_name) over (distribute by abc.p_mfgr sort by abc.p_name) as cd, +abc.p_retailprice, sum(abc.p_retailprice) over (distribute by abc.p_mfgr sort by abc.p_name rows between unbounded preceding and current row) as s1, +abc.p_size, abc.p_size - lag(abc.p_size,1,abc.p_size) over (distribute by abc.p_mfgr sort by abc.p_name) as deltaSz +from noop(on part +partition by p_mfgr +order by p_name +) abc join part p1 on abc.p_partkey = p1.p_partkey +POSTHOOK: type: QUERY POSTHOOK: Input: default@part #### A masked pattern was here #### Manufacturer#1 almond antique burnished rose metallic 1 1 4 1173.15 1173.15 2 0 @@ -686,19 +1923,99 @@ Manufacturer#5 almond antique sky peru orange 3 3 3 1788.73 5190.08 2 -4 Manufacturer#5 almond aquamarine dodger light gainsboro 4 4 4 1018.1 6208.18 46 44 Manufacturer#5 almond azure blanched chiffon midnight 5 5 5 1464.48 7672.66 23 -23 PREHOOK: query: -- 15. testDistinctInSelectWithPTF +explain select DISTINCT p_mfgr, p_name, p_size from noop(on part partition by p_mfgr order by p_name) PREHOOK: type: QUERY -PREHOOK: Input: default@part -#### A masked pattern was here #### POSTHOOK: query: -- 15. testDistinctInSelectWithPTF +explain select DISTINCT p_mfgr, p_name, p_size from noop(on part partition by p_mfgr order by p_name) POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 2 (GROUP, 2) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_mfgr (type: string), p_name (type: string) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int) + Reducer 2 + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int) + outputColumnNames: _col2, _col1, _col5 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col2 (type: string), _col1 (type: string), _col5 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reducer 3 + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select DISTINCT p_mfgr, p_name, p_size +from noop(on part +partition by p_mfgr +order by p_name) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select DISTINCT p_mfgr, p_name, p_size +from noop(on part +partition by p_mfgr +order by p_name) +POSTHOOK: type: QUERY POSTHOOK: Input: default@part #### A masked pattern was here #### Manufacturer#1 almond antique burnished rose metallic 2 @@ -746,6 +2063,108 @@ POSTHOOK: type: CREATEVIEW POSTHOOK: Input: default@part POSTHOOK: Output: database:default POSTHOOK: Output: default@mfgr_price_view +PREHOOK: query: explain +select p_mfgr, p_brand, s, +sum(s) over w1 as s1 +from noop(on mfgr_price_view +partition by p_mfgr +order by p_mfgr) +window w1 as ( partition by p_mfgr order by p_brand rows between 2 preceding and current row) +PREHOOK: type: QUERY +POSTHOOK: query: explain +select p_mfgr, p_brand, s, +sum(s) over w1 as s1 +from noop(on mfgr_price_view +partition by p_mfgr +order by p_mfgr) +window w1 as ( partition by p_mfgr order by p_brand rows between 2 preceding and current row) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 2) + Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 2) + Reducer 4 <- Reducer 3 (PARTITION-LEVEL SORT, 2) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: p_mfgr (type: string), p_brand (type: string), p_retailprice (type: double) + outputColumnNames: p_mfgr, p_brand, p_retailprice + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(p_retailprice) + keys: p_mfgr (type: string), p_brand (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: double) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col0 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: double) + Reducer 3 + Reduce Operator Tree: + Extract + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: double) + Reducer 4 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), VALUE._col0 (type: double) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: double), _wcol0 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + PREHOOK: query: select p_mfgr, p_brand, s, sum(s) over w1 as s1 from noop(on mfgr_price_view @@ -828,6 +2247,172 @@ fv1 INT) POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@part_5 +PREHOOK: query: explain +from noop(on part +partition by p_mfgr +order by p_name) +INSERT OVERWRITE TABLE part_4 select p_mfgr, p_name, p_size, +rank() over (distribute by p_mfgr sort by p_name) as r, +dense_rank() over (distribute by p_mfgr sort by p_name) as dr, +sum(p_retailprice) over (distribute by p_mfgr sort by p_name rows between unbounded preceding and current row) as s +INSERT OVERWRITE TABLE part_5 select p_mfgr,p_name, p_size, +round(sum(p_size) over (distribute by p_mfgr sort by p_size range between 5 preceding and current row),1) as s2, +rank() over (distribute by p_mfgr sort by p_mfgr, p_name) as r, +dense_rank() over (distribute by p_mfgr sort by p_mfgr, p_name) as dr, +cume_dist() over (distribute by p_mfgr sort by p_mfgr, p_name) as cud, +first_value(p_size, true) over w1 as fv1 +window w1 as (distribute by p_mfgr sort by p_mfgr, p_name rows between 2 preceding and 2 following) +PREHOOK: type: QUERY +POSTHOOK: query: explain +from noop(on part +partition by p_mfgr +order by p_name) +INSERT OVERWRITE TABLE part_4 select p_mfgr, p_name, p_size, +rank() over (distribute by p_mfgr sort by p_name) as r, +dense_rank() over (distribute by p_mfgr sort by p_name) as dr, +sum(p_retailprice) over (distribute by p_mfgr sort by p_name rows between unbounded preceding and current row) as s +INSERT OVERWRITE TABLE part_5 select p_mfgr,p_name, p_size, +round(sum(p_size) over (distribute by p_mfgr sort by p_size range between 5 preceding and current row),1) as s2, +rank() over (distribute by p_mfgr sort by p_mfgr, p_name) as r, +dense_rank() over (distribute by p_mfgr sort by p_mfgr, p_name) as dr, +cume_dist() over (distribute by p_mfgr sort by p_mfgr, p_name) as cud, +first_value(p_size, true) over w1 as fv1 +window w1 as (distribute by p_mfgr sort by p_mfgr, p_name rows between 2 preceding and 2 following) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-2 is a root stage + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 + Stage-1 depends on stages: Stage-2 + Stage-4 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-2 + Spark + Edges: + Reducer 6 <- Map 1 (PARTITION-LEVEL SORT, 2) + Reducer 7 <- Map 1 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 6 (PARTITION-LEVEL SORT, 2) + Reducer 4 <- Reducer 7 (PARTITION-LEVEL SORT, 2) + Reducer 5 <- Reducer 4 (PARTITION-LEVEL SORT, 2) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_mfgr (type: string), p_name (type: string) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int), p_retailprice (type: double) + Reducer 3 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) + outputColumnNames: _col1, _col2, _col5, _col7 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.part_4 + Reducer 4 + Reduce Operator Tree: + Select Operator + expressions: VALUE._col1 (type: string), KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: int) + outputColumnNames: _col1, _col2, _col5 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: string), _col2 (type: string), _col1 (type: string) + sort order: +++ + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _wcol0 (type: bigint), _col5 (type: int) + Reducer 5 + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: bigint), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col4 (type: int) + outputColumnNames: _col0, _col2, _col3, _col6 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col3 (type: string), _col2 (type: string), _col6 (type: int), UDFToInteger(round(_col0, 1)) (type: int), _wcol1 (type: int), _wcol2 (type: int), _wcol3 (type: double), _wcol4 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.part_5 + Reducer 6 + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col5 (type: int), _col7 (type: double) + Reducer 7 + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: string), _col5 (type: int) + sort order: ++ + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.part_4 + + Stage: Stage-3 + Stats-Aggr Operator + + Stage: Stage-1 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.part_5 + + Stage: Stage-4 + Stats-Aggr Operator + PREHOOK: query: from noop(on part partition by p_mfgr order by p_name) @@ -947,6 +2532,7 @@ Manufacturer#5 almond antique sky peru orange 2 2 3 3 0.6 31 Manufacturer#5 almond aquamarine dodger light gainsboro 46 46 4 4 0.8 6 Manufacturer#5 almond azure blanched chiffon midnight 23 23 5 5 1.0 2 PREHOOK: query: -- 18. testMulti2OperatorsFunctionChainWithMap +explain select p_mfgr, p_name, rank() over (partition by p_mfgr,p_name) as r, dense_rank() over (partition by p_mfgr,p_name) as dr, @@ -963,9 +2549,8 @@ from noop(on partition by p_mfgr,p_name order by p_mfgr,p_name) PREHOOK: type: QUERY -PREHOOK: Input: default@part -#### A masked pattern was here #### POSTHOOK: query: -- 18. testMulti2OperatorsFunctionChainWithMap +explain select p_mfgr, p_name, rank() over (partition by p_mfgr,p_name) as r, dense_rank() over (partition by p_mfgr,p_name) as dr, @@ -982,6 +2567,116 @@ from noop(on partition by p_mfgr,p_name order by p_mfgr,p_name) POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 2) + Reducer 4 <- Reducer 3 (PARTITION-LEVEL SORT, 2) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_mfgr (type: string), p_mfgr (type: string) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int) + Reducer 2 + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string), _col2 (type: string), _col1 (type: string) + sort order: ++++ + Map-reduce partition columns: _col2 (type: string), _col1 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) + Reducer 3 + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string), _col2 (type: string), _col1 (type: string) + sort order: ++++ + Map-reduce partition columns: _col2 (type: string), _col1 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col5 (type: int) + Reducer 4 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) + outputColumnNames: _col1, _col2, _col5 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col1 (type: string), _wcol0 (type: int), _wcol1 (type: int), _col5 (type: int), _wcol2 (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr, p_name, +rank() over (partition by p_mfgr,p_name) as r, +dense_rank() over (partition by p_mfgr,p_name) as dr, +p_size, sum(p_size) over (partition by p_mfgr,p_name rows between unbounded preceding and current row) as s1 +from noop(on + noopwithmap(on + noop(on + noop(on part + partition by p_mfgr + order by p_mfgr) + ) + partition by p_mfgr,p_name + order by p_mfgr,p_name) + partition by p_mfgr,p_name + order by p_mfgr,p_name) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, p_name, +rank() over (partition by p_mfgr,p_name) as r, +dense_rank() over (partition by p_mfgr,p_name) as dr, +p_size, sum(p_size) over (partition by p_mfgr,p_name rows between unbounded preceding and current row) as s1 +from noop(on + noopwithmap(on + noop(on + noop(on part + partition by p_mfgr + order by p_mfgr) + ) + partition by p_mfgr,p_name + order by p_mfgr,p_name) + partition by p_mfgr,p_name + order by p_mfgr,p_name) +POSTHOOK: type: QUERY POSTHOOK: Input: default@part #### A masked pattern was here #### Manufacturer#1 almond antique burnished rose metallic 1 1 2 2 @@ -1011,6 +2706,7 @@ Manufacturer#5 almond antique sky peru orange 1 1 2 2 Manufacturer#5 almond aquamarine dodger light gainsboro 1 1 46 46 Manufacturer#5 almond azure blanched chiffon midnight 1 1 23 23 PREHOOK: query: -- 19. testMulti3OperatorsFunctionChain +explain select p_mfgr, p_name, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -1027,9 +2723,8 @@ from noop(on partition by p_mfgr order by p_mfgr ) PREHOOK: type: QUERY -PREHOOK: Input: default@part -#### A masked pattern was here #### POSTHOOK: query: -- 19. testMulti3OperatorsFunctionChain +explain select p_mfgr, p_name, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -1046,6 +2741,127 @@ from noop(on partition by p_mfgr order by p_mfgr ) POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 2) + Reducer 4 <- Reducer 3 (PARTITION-LEVEL SORT, 2) + Reducer 5 <- Reducer 4 (PARTITION-LEVEL SORT, 2) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_mfgr (type: string), p_mfgr (type: string) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int) + Reducer 2 + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string), _col2 (type: string), _col1 (type: string) + sort order: ++++ + Map-reduce partition columns: _col2 (type: string), _col1 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) + Reducer 3 + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: string), _col2 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) + Reducer 4 + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col5 (type: int) + Reducer 5 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) + outputColumnNames: _col1, _col2, _col5 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col1 (type: string), _wcol0 (type: int), _wcol1 (type: int), _col5 (type: int), _wcol2 (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr, p_name, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +p_size, sum(p_size) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s1 +from noop(on + noop(on + noop(on + noop(on part + partition by p_mfgr + order by p_mfgr) + ) + partition by p_mfgr,p_name + order by p_mfgr,p_name) + partition by p_mfgr + order by p_mfgr ) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, p_name, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +p_size, sum(p_size) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s1 +from noop(on + noop(on + noop(on + noop(on part + partition by p_mfgr + order by p_mfgr) + ) + partition by p_mfgr,p_name + order by p_mfgr,p_name) + partition by p_mfgr + order by p_mfgr ) +POSTHOOK: type: QUERY POSTHOOK: Input: default@part #### A masked pattern was here #### Manufacturer#1 almond antique burnished rose metallic 1 1 2 2 @@ -1075,6 +2891,7 @@ Manufacturer#5 almond antique sky peru orange 3 3 2 39 Manufacturer#5 almond aquamarine dodger light gainsboro 4 4 46 85 Manufacturer#5 almond azure blanched chiffon midnight 5 5 23 108 PREHOOK: query: -- 20. testMultiOperatorChainWithNoWindowing +explain select p_mfgr, p_name, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -1089,9 +2906,8 @@ from noop(on partition by p_mfgr order by p_mfgr)) PREHOOK: type: QUERY -PREHOOK: Input: default@part -#### A masked pattern was here #### POSTHOOK: query: -- 20. testMultiOperatorChainWithNoWindowing +explain select p_mfgr, p_name, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -1106,6 +2922,110 @@ from noop(on partition by p_mfgr order by p_mfgr)) POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 2) + Reducer 4 <- Reducer 3 (PARTITION-LEVEL SORT, 2) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_mfgr (type: string), p_name (type: string), p_mfgr (type: string), p_name (type: string) + sort order: ++++ + Map-reduce partition columns: p_mfgr (type: string), p_name (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int) + Reducer 2 + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: string), _col2 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) + Reducer 3 + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col5 (type: int) + Reducer 4 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) + outputColumnNames: _col1, _col2, _col5 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col1 (type: string), _wcol0 (type: int), _wcol1 (type: int), _col5 (type: int), _wcol2 (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr, p_name, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +p_size, sum(p_size) over (partition by p_mfgr order by p_name) as s1 +from noop(on + noop(on + noop(on + noop(on part + partition by p_mfgr,p_name + order by p_mfgr,p_name) + ) + partition by p_mfgr + order by p_mfgr)) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, p_name, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +p_size, sum(p_size) over (partition by p_mfgr order by p_name) as s1 +from noop(on + noop(on + noop(on + noop(on part + partition by p_mfgr,p_name + order by p_mfgr,p_name) + ) + partition by p_mfgr + order by p_mfgr)) +POSTHOOK: type: QUERY POSTHOOK: Input: default@part #### A masked pattern was here #### Manufacturer#1 almond antique burnished rose metallic 1 1 2 4 @@ -1135,6 +3055,7 @@ Manufacturer#5 almond antique sky peru orange 3 3 2 39 Manufacturer#5 almond aquamarine dodger light gainsboro 4 4 46 85 Manufacturer#5 almond azure blanched chiffon midnight 5 5 23 108 PREHOOK: query: -- 21. testMultiOperatorChainEndsWithNoopMap +explain select p_mfgr, p_name, rank() over (partition by p_mfgr,p_name) as r, dense_rank() over (partition by p_mfgr,p_name) as dr, @@ -1151,9 +3072,8 @@ from noopwithmap(on partition by p_mfgr,p_name order by p_mfgr,p_name) PREHOOK: type: QUERY -PREHOOK: Input: default@part -#### A masked pattern was here #### POSTHOOK: query: -- 21. testMultiOperatorChainEndsWithNoopMap +explain select p_mfgr, p_name, rank() over (partition by p_mfgr,p_name) as r, dense_rank() over (partition by p_mfgr,p_name) as dr, @@ -1170,6 +3090,129 @@ from noopwithmap(on partition by p_mfgr,p_name order by p_mfgr,p_name) POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 2) + Reducer 4 <- Reducer 3 (PARTITION-LEVEL SORT, 2) + Reducer 5 <- Reducer 4 (PARTITION-LEVEL SORT, 2) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_mfgr (type: string), p_name (type: string), p_mfgr (type: string), p_name (type: string) + sort order: ++++ + Map-reduce partition columns: p_mfgr (type: string), p_name (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int) + Reducer 2 + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: string), _col2 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) + Reducer 3 + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string), _col2 (type: string), _col1 (type: string) + sort order: ++++ + Map-reduce partition columns: _col2 (type: string), _col1 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) + Reducer 4 + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string), _col2 (type: string), _col1 (type: string) + sort order: ++++ + Map-reduce partition columns: _col2 (type: string), _col1 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col5 (type: int) + Reducer 5 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) + outputColumnNames: _col1, _col2, _col5 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col1 (type: string), _wcol0 (type: int), _wcol1 (type: int), _col5 (type: int), _wcol2 (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr, p_name, +rank() over (partition by p_mfgr,p_name) as r, +dense_rank() over (partition by p_mfgr,p_name) as dr, +p_size, sum(p_size) over (partition by p_mfgr,p_name rows between unbounded preceding and current row) as s1 +from noopwithmap(on + noop(on + noop(on + noop(on part + partition by p_mfgr,p_name + order by p_mfgr,p_name) + ) + partition by p_mfgr + order by p_mfgr) + partition by p_mfgr,p_name + order by p_mfgr,p_name) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, p_name, +rank() over (partition by p_mfgr,p_name) as r, +dense_rank() over (partition by p_mfgr,p_name) as dr, +p_size, sum(p_size) over (partition by p_mfgr,p_name rows between unbounded preceding and current row) as s1 +from noopwithmap(on + noop(on + noop(on + noop(on part + partition by p_mfgr,p_name + order by p_mfgr,p_name) + ) + partition by p_mfgr + order by p_mfgr) + partition by p_mfgr,p_name + order by p_mfgr,p_name) +POSTHOOK: type: QUERY POSTHOOK: Input: default@part #### A masked pattern was here #### Manufacturer#1 almond antique burnished rose metallic 1 1 2 2 @@ -1199,6 +3242,7 @@ Manufacturer#5 almond antique sky peru orange 1 1 2 2 Manufacturer#5 almond aquamarine dodger light gainsboro 1 1 46 46 Manufacturer#5 almond azure blanched chiffon midnight 1 1 23 23 PREHOOK: query: -- 22. testMultiOperatorChainWithDiffPartitionForWindow1 +explain select p_mfgr, p_name, rank() over (partition by p_mfgr,p_name order by p_mfgr,p_name) as r, dense_rank() over (partition by p_mfgr,p_name order by p_mfgr,p_name) as dr, @@ -1214,9 +3258,8 @@ from noop(on order by p_mfgr )) PREHOOK: type: QUERY -PREHOOK: Input: default@part -#### A masked pattern was here #### POSTHOOK: query: -- 22. testMultiOperatorChainWithDiffPartitionForWindow1 +explain select p_mfgr, p_name, rank() over (partition by p_mfgr,p_name order by p_mfgr,p_name) as r, dense_rank() over (partition by p_mfgr,p_name order by p_mfgr,p_name) as dr, @@ -1232,6 +3275,114 @@ from noop(on order by p_mfgr )) POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 2) + Reducer 4 <- Reducer 3 (PARTITION-LEVEL SORT, 2) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_mfgr (type: string), p_name (type: string), p_mfgr (type: string), p_name (type: string) + sort order: ++++ + Map-reduce partition columns: p_mfgr (type: string), p_name (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int) + Reducer 2 + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: string), _col2 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) + Reducer 3 + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string), _col2 (type: string), _col1 (type: string) + sort order: ++++ + Map-reduce partition columns: _col2 (type: string), _col1 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col5 (type: int) + Reducer 4 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) + outputColumnNames: _col1, _col2, _col5 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col1 (type: string), _wcol0 (type: int), _wcol1 (type: int), _col5 (type: int), _wcol2 (type: bigint), _wcol2 (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr, p_name, +rank() over (partition by p_mfgr,p_name order by p_mfgr,p_name) as r, +dense_rank() over (partition by p_mfgr,p_name order by p_mfgr,p_name) as dr, +p_size, +sum(p_size) over (partition by p_mfgr,p_name order by p_mfgr,p_name rows between unbounded preceding and current row) as s1, +sum(p_size) over (partition by p_mfgr,p_name order by p_mfgr,p_name rows between unbounded preceding and current row) as s2 +from noop(on + noopwithmap(on + noop(on part + partition by p_mfgr, p_name + order by p_mfgr, p_name) + partition by p_mfgr + order by p_mfgr + )) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, p_name, +rank() over (partition by p_mfgr,p_name order by p_mfgr,p_name) as r, +dense_rank() over (partition by p_mfgr,p_name order by p_mfgr,p_name) as dr, +p_size, +sum(p_size) over (partition by p_mfgr,p_name order by p_mfgr,p_name rows between unbounded preceding and current row) as s1, +sum(p_size) over (partition by p_mfgr,p_name order by p_mfgr,p_name rows between unbounded preceding and current row) as s2 +from noop(on + noopwithmap(on + noop(on part + partition by p_mfgr, p_name + order by p_mfgr, p_name) + partition by p_mfgr + order by p_mfgr + )) +POSTHOOK: type: QUERY POSTHOOK: Input: default@part #### A masked pattern was here #### Manufacturer#1 almond antique burnished rose metallic 1 1 2 2 2 @@ -1261,6 +3412,7 @@ Manufacturer#5 almond antique sky peru orange 1 1 2 2 2 Manufacturer#5 almond aquamarine dodger light gainsboro 1 1 46 46 46 Manufacturer#5 almond azure blanched chiffon midnight 1 1 23 23 23 PREHOOK: query: -- 23. testMultiOperatorChainWithDiffPartitionForWindow2 +explain select p_mfgr, p_name, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -1274,9 +3426,8 @@ from noopwithmap(on order by p_mfgr, p_name) )) PREHOOK: type: QUERY -PREHOOK: Input: default@part -#### A masked pattern was here #### POSTHOOK: query: -- 23. testMultiOperatorChainWithDiffPartitionForWindow2 +explain select p_mfgr, p_name, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -1290,6 +3441,110 @@ from noopwithmap(on order by p_mfgr, p_name) )) POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 2) + Reducer 4 <- Reducer 3 (PARTITION-LEVEL SORT, 2) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_mfgr (type: string), p_name (type: string), p_mfgr (type: string), p_name (type: string) + sort order: ++++ + Map-reduce partition columns: p_mfgr (type: string), p_name (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int) + Reducer 2 + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string), _col2 (type: string), _col1 (type: string) + sort order: ++++ + Map-reduce partition columns: _col2 (type: string), _col1 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) + Reducer 3 + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col5 (type: int) + Reducer 4 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) + outputColumnNames: _col1, _col2, _col5 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col1 (type: string), _wcol0 (type: int), _wcol1 (type: int), _col5 (type: int), _wcol2 (type: bigint), _wcol2 (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr, p_name, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +p_size, +sum(p_size) over (partition by p_mfgr order by p_name range between unbounded preceding and current row) as s1, +sum(p_size) over (partition by p_mfgr order by p_name range between unbounded preceding and current row) as s2 +from noopwithmap(on + noop(on + noop(on part + partition by p_mfgr, p_name + order by p_mfgr, p_name) + )) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, p_name, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +p_size, +sum(p_size) over (partition by p_mfgr order by p_name range between unbounded preceding and current row) as s1, +sum(p_size) over (partition by p_mfgr order by p_name range between unbounded preceding and current row) as s2 +from noopwithmap(on + noop(on + noop(on part + partition by p_mfgr, p_name + order by p_mfgr, p_name) + )) +POSTHOOK: type: QUERY POSTHOOK: Input: default@part #### A masked pattern was here #### Manufacturer#1 almond antique burnished rose metallic 1 1 2 4 4 diff --git ql/src/test/results/clientpositive/spark/ptf_streaming.q.out ql/src/test/results/clientpositive/spark/ptf_streaming.q.out index ef7ae88..9f98933 100644 --- ql/src/test/results/clientpositive/spark/ptf_streaming.q.out +++ ql/src/test/results/clientpositive/spark/ptf_streaming.q.out @@ -1,12 +1,7 @@ -PREHOOK: query: create temporary function noopstreaming as 'org.apache.hadoop.hive.ql.udf.ptf.NoopStreaming$NoopStreamingResolver' -PREHOOK: type: CREATEFUNCTION -PREHOOK: Output: noopstreaming -POSTHOOK: query: create temporary function noopstreaming as 'org.apache.hadoop.hive.ql.udf.ptf.NoopStreaming$NoopStreamingResolver' -POSTHOOK: type: CREATEFUNCTION -POSTHOOK: Output: noopstreaming PREHOOK: query: -- SORT_QUERY_RESULTS --1. test1 +explain select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -16,11 +11,10 @@ from noopstreaming(on part order by p_name ) PREHOOK: type: QUERY -PREHOOK: Input: default@part -#### A masked pattern was here #### POSTHOOK: query: -- SORT_QUERY_RESULTS --1. test1 +explain select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -30,6 +24,87 @@ from noopstreaming(on part order by p_name ) POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_mfgr (type: string), p_name (type: string) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int), p_retailprice (type: double) + Reducer 2 + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col5 (type: int), _col7 (type: double) + Reducer 3 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) + outputColumnNames: _col1, _col2, _col5, _col7 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr, p_name, p_size, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +sum(p_retailprice) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s1 +from noopstreaming(on part + partition by p_mfgr + order by p_name + ) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, p_name, p_size, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +sum(p_retailprice) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s1 +from noopstreaming(on part + partition by p_mfgr + order by p_name + ) +POSTHOOK: type: QUERY POSTHOOK: Input: default@part #### A masked pattern was here #### Manufacturer#1 almond antique burnished rose metallic 2 1 1 1173.15 @@ -59,21 +134,129 @@ Manufacturer#5 almond antique sky peru orange 2 3 3 5190.08 Manufacturer#5 almond aquamarine dodger light gainsboro 46 4 4 6208.18 Manufacturer#5 almond azure blanched chiffon midnight 23 5 5 7672.66 PREHOOK: query: -- 2. testJoinWithNoop +explain select p_mfgr, p_name, p_size, p_size - lag(p_size,1,p_size) over (partition by p_mfgr order by p_name) as deltaSz from noopstreaming (on (select p1.* from part p1 join part p2 on p1.p_partkey = p2.p_partkey) j distribute by j.p_mfgr sort by j.p_name) PREHOOK: type: QUERY -PREHOOK: Input: default@part -#### A masked pattern was here #### POSTHOOK: query: -- 2. testJoinWithNoop +explain select p_mfgr, p_name, p_size, p_size - lag(p_size,1,p_size) over (partition by p_mfgr order by p_name) as deltaSz from noopstreaming (on (select p1.* from part p1 join part p2 on p1.p_partkey = p2.p_partkey) j distribute by j.p_mfgr sort by j.p_name) POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 5 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 2) + Reducer 4 <- Reducer 3 (PARTITION-LEVEL SORT, 2) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: p1 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: p_partkey is not null (type: boolean) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_partkey (type: int) + sort order: + + Map-reduce partition columns: p_partkey (type: int) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int) + Map 5 + Map Operator Tree: + TableScan + alias: p2 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: p_partkey is not null (type: boolean) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_partkey (type: int) + sort order: + + Map-reduce partition columns: p_partkey (type: int) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 p_partkey (type: int) + 1 p_partkey (type: int) + outputColumnNames: _col1, _col2, _col5 + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) + Reducer 3 + Reduce Operator Tree: + Extract + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + value expressions: _col5 (type: int) + Reducer 4 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) + outputColumnNames: _col1, _col2, _col5 + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), (_col5 - _wcol0) (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr, p_name, +p_size, p_size - lag(p_size,1,p_size) over (partition by p_mfgr order by p_name) as deltaSz +from noopstreaming (on (select p1.* from part p1 join part p2 on p1.p_partkey = p2.p_partkey) j +distribute by j.p_mfgr +sort by j.p_name) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, p_name, +p_size, p_size - lag(p_size,1,p_size) over (partition by p_mfgr order by p_name) as deltaSz +from noopstreaming (on (select p1.* from part p1 join part p2 on p1.p_partkey = p2.p_partkey) j +distribute by j.p_mfgr +sort by j.p_name) +POSTHOOK: type: QUERY POSTHOOK: Input: default@part #### A masked pattern was here #### Manufacturer#1 almond antique burnished rose metallic 2 0 @@ -105,21 +288,114 @@ Manufacturer#5 almond antique sky peru orange 2 -4 Manufacturer#5 almond aquamarine dodger light gainsboro 46 44 Manufacturer#5 almond azure blanched chiffon midnight 23 -23 PREHOOK: query: -- 7. testJoin +explain select abc.* from noopstreaming(on part partition by p_mfgr order by p_name ) abc join part p1 on abc.p_partkey = p1.p_partkey PREHOOK: type: QUERY -PREHOOK: Input: default@part -#### A masked pattern was here #### POSTHOOK: query: -- 7. testJoin +explain select abc.* from noopstreaming(on part partition by p_mfgr order by p_name ) abc join part p1 on abc.p_partkey = p1.p_partkey POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Map 4 (PARTITION-LEVEL SORT, 2), Reducer 2 (PARTITION-LEVEL SORT, 2) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_mfgr (type: string), p_name (type: string) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + Map 4 + Map Operator Tree: + TableScan + alias: p1 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: p_partkey is not null (type: boolean) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_partkey (type: int) + sort order: + + Map-reduce partition columns: p_partkey (type: int) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: _col0 is not null (type: boolean) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + Reducer 3 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 p_partkey (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select abc.* +from noopstreaming(on part +partition by p_mfgr +order by p_name +) abc join part p1 on abc.p_partkey = p1.p_partkey +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select abc.* +from noopstreaming(on part +partition by p_mfgr +order by p_name +) abc join part p1 on abc.p_partkey = p1.p_partkey +POSTHOOK: type: QUERY POSTHOOK: Input: default@part #### A masked pattern was here #### 105685 almond antique violet chocolate turquoise Manufacturer#2 Brand#22 MEDIUM ANODIZED COPPER 14 MED CAN 1690.68 ly pending requ @@ -151,21 +427,97 @@ POSTHOOK: Input: default@part 86428 almond aquamarine burnished black steel Manufacturer#1 Brand#12 STANDARD ANODIZED STEEL 28 WRAP BAG 1414.42 arefully 90681 almond antique chartreuse khaki white Manufacturer#3 Brand#31 MEDIUM BURNISHED TIN 17 SM CASE 1671.68 are slyly after the sl PREHOOK: query: -- 9. testNoopWithMap +explain select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name, p_size desc) as r from noopwithmapstreaming(on part partition by p_mfgr order by p_name, p_size desc) PREHOOK: type: QUERY -PREHOOK: Input: default@part -#### A masked pattern was here #### POSTHOOK: query: -- 9. testNoopWithMap +explain select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name, p_size desc) as r from noopwithmapstreaming(on part partition by p_mfgr order by p_name, p_size desc) POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 2) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_mfgr (type: string), p_name (type: string), p_size (type: int) + sort order: ++- + Map-reduce partition columns: p_mfgr (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int) + Reducer 2 + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int) + sort order: ++- + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reducer 3 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), KEY.reducesinkkey2 (type: int) + outputColumnNames: _col1, _col2, _col5 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr, p_name, p_size, +rank() over (partition by p_mfgr order by p_name, p_size desc) as r +from noopwithmapstreaming(on part +partition by p_mfgr +order by p_name, p_size desc) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, p_name, p_size, +rank() over (partition by p_mfgr order by p_name, p_size desc) as r +from noopwithmapstreaming(on part +partition by p_mfgr +order by p_name, p_size desc) +POSTHOOK: type: QUERY POSTHOOK: Input: default@part #### A masked pattern was here #### Manufacturer#1 almond antique burnished rose metallic 2 1 @@ -195,6 +547,7 @@ Manufacturer#5 almond antique sky peru orange 2 3 Manufacturer#5 almond aquamarine dodger light gainsboro 46 4 Manufacturer#5 almond azure blanched chiffon midnight 23 5 PREHOOK: query: -- 10. testNoopWithMapWithWindowing +explain select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -203,9 +556,8 @@ from noopwithmapstreaming(on part partition by p_mfgr order by p_name) PREHOOK: type: QUERY -PREHOOK: Input: default@part -#### A masked pattern was here #### POSTHOOK: query: -- 10. testNoopWithMapWithWindowing +explain select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -214,6 +566,87 @@ from noopwithmapstreaming(on part partition by p_mfgr order by p_name) POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 2) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_mfgr (type: string), p_name (type: string) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int), p_retailprice (type: double) + Reducer 2 + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col5 (type: int), _col7 (type: double) + Reducer 3 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) + outputColumnNames: _col1, _col2, _col5, _col7 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr, p_name, p_size, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +sum(p_retailprice) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s1 +from noopwithmapstreaming(on part + partition by p_mfgr + order by p_name) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, p_name, p_size, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +sum(p_retailprice) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s1 +from noopwithmapstreaming(on part + partition by p_mfgr + order by p_name) +POSTHOOK: type: QUERY POSTHOOK: Input: default@part #### A masked pattern was here #### Manufacturer#1 almond antique burnished rose metallic 2 1 1 1173.15 @@ -243,6 +676,7 @@ Manufacturer#5 almond antique sky peru orange 2 3 3 5190.08 Manufacturer#5 almond aquamarine dodger light gainsboro 46 4 4 6208.18 Manufacturer#5 almond azure blanched chiffon midnight 23 5 5 7672.66 PREHOOK: query: -- 12. testFunctionChain +explain select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -252,9 +686,8 @@ partition by p_mfgr order by p_mfgr, p_name ))) PREHOOK: type: QUERY -PREHOOK: Input: default@part -#### A masked pattern was here #### POSTHOOK: query: -- 12. testFunctionChain +explain select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -264,6 +697,102 @@ partition by p_mfgr order by p_mfgr, p_name ))) POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 2) + Reducer 4 <- Reducer 3 (PARTITION-LEVEL SORT, 2) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_mfgr (type: string), p_mfgr (type: string), p_name (type: string) + sort order: +++ + Map-reduce partition columns: p_mfgr (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int), p_retailprice (type: double) + Reducer 2 + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: string), _col2 (type: string), _col1 (type: string) + sort order: +++ + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int), _col7 (type: double) + Reducer 3 + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col5 (type: int), _col7 (type: double) + Reducer 4 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) + outputColumnNames: _col1, _col2, _col5, _col7 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr, p_name, p_size, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +sum(p_retailprice) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s1 +from noopstreaming(on noopwithmapstreaming(on noopstreaming(on part +partition by p_mfgr +order by p_mfgr, p_name +))) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, p_name, p_size, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +sum(p_retailprice) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s1 +from noopstreaming(on noopwithmapstreaming(on noopstreaming(on part +partition by p_mfgr +order by p_mfgr, p_name +))) +POSTHOOK: type: QUERY POSTHOOK: Input: default@part #### A masked pattern was here #### Manufacturer#1 almond antique burnished rose metallic 2 1 1 1173.15 @@ -293,6 +822,7 @@ Manufacturer#5 almond antique sky peru orange 2 3 3 5190.08 Manufacturer#5 almond aquamarine dodger light gainsboro 46 4 4 6208.18 Manufacturer#5 almond azure blanched chiffon midnight 23 5 5 7672.66 PREHOOK: query: -- 12.1 testFunctionChain +explain select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -302,9 +832,8 @@ partition by p_mfgr order by p_mfgr, p_name ))) PREHOOK: type: QUERY -PREHOOK: Input: default@part -#### A masked pattern was here #### POSTHOOK: query: -- 12.1 testFunctionChain +explain select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -314,6 +843,102 @@ partition by p_mfgr order by p_mfgr, p_name ))) POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 2) + Reducer 4 <- Reducer 3 (PARTITION-LEVEL SORT, 2) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_mfgr (type: string), p_mfgr (type: string), p_name (type: string) + sort order: +++ + Map-reduce partition columns: p_mfgr (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int), p_retailprice (type: double) + Reducer 2 + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: string), _col2 (type: string), _col1 (type: string) + sort order: +++ + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int), _col7 (type: double) + Reducer 3 + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col5 (type: int), _col7 (type: double) + Reducer 4 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) + outputColumnNames: _col1, _col2, _col5, _col7 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr, p_name, p_size, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +sum(p_retailprice) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s1 +from noopstreaming(on noopwithmap(on noopstreaming(on part +partition by p_mfgr +order by p_mfgr, p_name +))) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, p_name, p_size, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +sum(p_retailprice) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s1 +from noopstreaming(on noopwithmap(on noopstreaming(on part +partition by p_mfgr +order by p_mfgr, p_name +))) +POSTHOOK: type: QUERY POSTHOOK: Input: default@part #### A masked pattern was here #### Manufacturer#1 almond antique burnished rose metallic 2 1 1 1173.15 @@ -343,6 +968,7 @@ Manufacturer#5 almond antique sky peru orange 2 3 3 5190.08 Manufacturer#5 almond aquamarine dodger light gainsboro 46 4 4 6208.18 Manufacturer#5 almond azure blanched chiffon midnight 23 5 5 7672.66 PREHOOK: query: -- 12.2 testFunctionChain +explain select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -352,9 +978,8 @@ partition by p_mfgr order by p_mfgr, p_name ))) PREHOOK: type: QUERY -PREHOOK: Input: default@part -#### A masked pattern was here #### POSTHOOK: query: -- 12.2 testFunctionChain +explain select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -364,6 +989,102 @@ partition by p_mfgr order by p_mfgr, p_name ))) POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 2) + Reducer 4 <- Reducer 3 (PARTITION-LEVEL SORT, 2) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_mfgr (type: string), p_mfgr (type: string), p_name (type: string) + sort order: +++ + Map-reduce partition columns: p_mfgr (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int), p_retailprice (type: double) + Reducer 2 + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: string), _col2 (type: string), _col1 (type: string) + sort order: +++ + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int), _col7 (type: double) + Reducer 3 + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col5 (type: int), _col7 (type: double) + Reducer 4 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) + outputColumnNames: _col1, _col2, _col5, _col7 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr, p_name, p_size, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +sum(p_retailprice) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s1 +from noop(on noopwithmapstreaming(on noopstreaming(on part +partition by p_mfgr +order by p_mfgr, p_name +))) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, p_name, p_size, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +sum(p_retailprice) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s1 +from noop(on noopwithmapstreaming(on noopstreaming(on part +partition by p_mfgr +order by p_mfgr, p_name +))) +POSTHOOK: type: QUERY POSTHOOK: Input: default@part #### A masked pattern was here #### Manufacturer#1 almond antique burnished rose metallic 2 1 1 1173.15 @@ -393,6 +1114,7 @@ Manufacturer#5 almond antique sky peru orange 2 3 3 5190.08 Manufacturer#5 almond aquamarine dodger light gainsboro 46 4 4 6208.18 Manufacturer#5 almond azure blanched chiffon midnight 23 5 5 7672.66 PREHOOK: query: -- 14. testPTFJoinWithWindowingWithCount +explain select abc.p_mfgr, abc.p_name, rank() over (distribute by abc.p_mfgr sort by abc.p_name) as r, dense_rank() over (distribute by abc.p_mfgr sort by abc.p_name) as dr, @@ -404,9 +1126,8 @@ partition by p_mfgr order by p_name ) abc join part p1 on abc.p_partkey = p1.p_partkey PREHOOK: type: QUERY -PREHOOK: Input: default@part -#### A masked pattern was here #### POSTHOOK: query: -- 14. testPTFJoinWithWindowingWithCount +explain select abc.p_mfgr, abc.p_name, rank() over (distribute by abc.p_mfgr sort by abc.p_name) as r, dense_rank() over (distribute by abc.p_mfgr sort by abc.p_name) as dr, @@ -418,6 +1139,124 @@ partition by p_mfgr order by p_name ) abc join part p1 on abc.p_partkey = p1.p_partkey POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Map 5 (PARTITION-LEVEL SORT, 2), Reducer 2 (PARTITION-LEVEL SORT, 2) + Reducer 4 <- Reducer 3 (PARTITION-LEVEL SORT, 2) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_mfgr (type: string), p_name (type: string) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_size (type: int), p_retailprice (type: double) + Map 5 + Map Operator Tree: + TableScan + alias: p1 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: p_partkey is not null (type: boolean) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_partkey (type: int) + sort order: + + Map-reduce partition columns: p_partkey (type: int) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: _col0 is not null (type: boolean) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int), _col7 (type: double) + Reducer 3 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 p_partkey (type: int) + outputColumnNames: _col1, _col2, _col5, _col7 + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + value expressions: _col5 (type: int), _col7 (type: double) + Reducer 4 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) + outputColumnNames: _col1, _col2, _col5, _col7 + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col1 (type: string), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: bigint), _col7 (type: double), _wcol3 (type: double), _col5 (type: int), (_col5 - _wcol4) (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select abc.p_mfgr, abc.p_name, +rank() over (distribute by abc.p_mfgr sort by abc.p_name) as r, +dense_rank() over (distribute by abc.p_mfgr sort by abc.p_name) as dr, +count(abc.p_name) over (distribute by abc.p_mfgr sort by abc.p_name) as cd, +abc.p_retailprice, sum(abc.p_retailprice) over (distribute by abc.p_mfgr sort by abc.p_name rows between unbounded preceding and current row) as s1, +abc.p_size, abc.p_size - lag(abc.p_size,1,abc.p_size) over (distribute by abc.p_mfgr sort by abc.p_name) as deltaSz +from noopstreaming(on part +partition by p_mfgr +order by p_name +) abc join part p1 on abc.p_partkey = p1.p_partkey +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select abc.p_mfgr, abc.p_name, +rank() over (distribute by abc.p_mfgr sort by abc.p_name) as r, +dense_rank() over (distribute by abc.p_mfgr sort by abc.p_name) as dr, +count(abc.p_name) over (distribute by abc.p_mfgr sort by abc.p_name) as cd, +abc.p_retailprice, sum(abc.p_retailprice) over (distribute by abc.p_mfgr sort by abc.p_name rows between unbounded preceding and current row) as s1, +abc.p_size, abc.p_size - lag(abc.p_size,1,abc.p_size) over (distribute by abc.p_mfgr sort by abc.p_name) as deltaSz +from noopstreaming(on part +partition by p_mfgr +order by p_name +) abc join part p1 on abc.p_partkey = p1.p_partkey +POSTHOOK: type: QUERY POSTHOOK: Input: default@part #### A masked pattern was here #### Manufacturer#1 almond antique burnished rose metallic 1 1 4 1173.15 1173.15 2 0 @@ -449,6 +1288,7 @@ Manufacturer#5 almond antique sky peru orange 3 3 3 1788.73 5190.08 2 -4 Manufacturer#5 almond aquamarine dodger light gainsboro 4 4 4 1018.1 6208.18 46 44 Manufacturer#5 almond azure blanched chiffon midnight 5 5 5 1464.48 7672.66 23 -23 PREHOOK: query: -- 18. testMulti2OperatorsFunctionChainWithMap +explain select p_mfgr, p_name, rank() over (partition by p_mfgr,p_name) as r, dense_rank() over (partition by p_mfgr,p_name) as dr, @@ -465,9 +1305,8 @@ from noopstreaming(on partition by p_mfgr,p_name order by p_mfgr,p_name) PREHOOK: type: QUERY -PREHOOK: Input: default@part -#### A masked pattern was here #### POSTHOOK: query: -- 18. testMulti2OperatorsFunctionChainWithMap +explain select p_mfgr, p_name, rank() over (partition by p_mfgr,p_name) as r, dense_rank() over (partition by p_mfgr,p_name) as dr, @@ -484,6 +1323,116 @@ from noopstreaming(on partition by p_mfgr,p_name order by p_mfgr,p_name) POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 2) + Reducer 4 <- Reducer 3 (PARTITION-LEVEL SORT, 2) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_mfgr (type: string), p_mfgr (type: string) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int) + Reducer 2 + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string), _col2 (type: string), _col1 (type: string) + sort order: ++++ + Map-reduce partition columns: _col2 (type: string), _col1 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) + Reducer 3 + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string), _col2 (type: string), _col1 (type: string) + sort order: ++++ + Map-reduce partition columns: _col2 (type: string), _col1 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col5 (type: int) + Reducer 4 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) + outputColumnNames: _col1, _col2, _col5 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col1 (type: string), _wcol0 (type: int), _wcol1 (type: int), _col5 (type: int), _wcol2 (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr, p_name, +rank() over (partition by p_mfgr,p_name) as r, +dense_rank() over (partition by p_mfgr,p_name) as dr, +p_size, sum(p_size) over (partition by p_mfgr,p_name rows between unbounded preceding and current row) as s1 +from noopstreaming(on + noopwithmap(on + noop(on + noopstreaming(on part + partition by p_mfgr + order by p_mfgr) + ) + partition by p_mfgr,p_name + order by p_mfgr,p_name) + partition by p_mfgr,p_name + order by p_mfgr,p_name) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, p_name, +rank() over (partition by p_mfgr,p_name) as r, +dense_rank() over (partition by p_mfgr,p_name) as dr, +p_size, sum(p_size) over (partition by p_mfgr,p_name rows between unbounded preceding and current row) as s1 +from noopstreaming(on + noopwithmap(on + noop(on + noopstreaming(on part + partition by p_mfgr + order by p_mfgr) + ) + partition by p_mfgr,p_name + order by p_mfgr,p_name) + partition by p_mfgr,p_name + order by p_mfgr,p_name) +POSTHOOK: type: QUERY POSTHOOK: Input: default@part #### A masked pattern was here #### Manufacturer#1 almond antique burnished rose metallic 1 1 2 2 @@ -513,6 +1462,7 @@ Manufacturer#5 almond antique sky peru orange 1 1 2 2 Manufacturer#5 almond aquamarine dodger light gainsboro 1 1 46 46 Manufacturer#5 almond azure blanched chiffon midnight 1 1 23 23 PREHOOK: query: -- 19. testMulti3OperatorsFunctionChain +explain select p_mfgr, p_name, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -529,9 +1479,8 @@ from noop(on partition by p_mfgr order by p_mfgr ) PREHOOK: type: QUERY -PREHOOK: Input: default@part -#### A masked pattern was here #### POSTHOOK: query: -- 19. testMulti3OperatorsFunctionChain +explain select p_mfgr, p_name, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -548,6 +1497,127 @@ from noop(on partition by p_mfgr order by p_mfgr ) POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 2) + Reducer 4 <- Reducer 3 (PARTITION-LEVEL SORT, 2) + Reducer 5 <- Reducer 4 (PARTITION-LEVEL SORT, 2) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_mfgr (type: string), p_mfgr (type: string) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int) + Reducer 2 + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string), _col2 (type: string), _col1 (type: string) + sort order: ++++ + Map-reduce partition columns: _col2 (type: string), _col1 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) + Reducer 3 + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: string), _col2 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) + Reducer 4 + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col5 (type: int) + Reducer 5 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) + outputColumnNames: _col1, _col2, _col5 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col1 (type: string), _wcol0 (type: int), _wcol1 (type: int), _col5 (type: int), _wcol2 (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr, p_name, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +p_size, sum(p_size) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s1 +from noop(on + noopstreaming(on + noop(on + noopstreaming(on part + partition by p_mfgr + order by p_mfgr) + ) + partition by p_mfgr,p_name + order by p_mfgr,p_name) + partition by p_mfgr + order by p_mfgr ) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, p_name, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +p_size, sum(p_size) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s1 +from noop(on + noopstreaming(on + noop(on + noopstreaming(on part + partition by p_mfgr + order by p_mfgr) + ) + partition by p_mfgr,p_name + order by p_mfgr,p_name) + partition by p_mfgr + order by p_mfgr ) +POSTHOOK: type: QUERY POSTHOOK: Input: default@part #### A masked pattern was here #### Manufacturer#1 almond antique burnished rose metallic 1 1 2 2 @@ -577,6 +1647,7 @@ Manufacturer#5 almond antique sky peru orange 3 3 2 39 Manufacturer#5 almond aquamarine dodger light gainsboro 4 4 46 85 Manufacturer#5 almond azure blanched chiffon midnight 5 5 23 108 PREHOOK: query: -- 23. testMultiOperatorChainWithDiffPartitionForWindow2 +explain select p_mfgr, p_name, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -590,9 +1661,8 @@ from noopwithmapstreaming(on order by p_mfgr, p_name) )) PREHOOK: type: QUERY -PREHOOK: Input: default@part -#### A masked pattern was here #### POSTHOOK: query: -- 23. testMultiOperatorChainWithDiffPartitionForWindow2 +explain select p_mfgr, p_name, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -606,6 +1676,110 @@ from noopwithmapstreaming(on order by p_mfgr, p_name) )) POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 2) + Reducer 4 <- Reducer 3 (PARTITION-LEVEL SORT, 2) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_mfgr (type: string), p_name (type: string), p_mfgr (type: string), p_name (type: string) + sort order: ++++ + Map-reduce partition columns: p_mfgr (type: string), p_name (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int) + Reducer 2 + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string), _col2 (type: string), _col1 (type: string) + sort order: ++++ + Map-reduce partition columns: _col2 (type: string), _col1 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) + Reducer 3 + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col5 (type: int) + Reducer 4 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) + outputColumnNames: _col1, _col2, _col5 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col1 (type: string), _wcol0 (type: int), _wcol1 (type: int), _col5 (type: int), _wcol2 (type: bigint), _wcol2 (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr, p_name, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +p_size, +sum(p_size) over (partition by p_mfgr order by p_name range between unbounded preceding and current row) as s1, +sum(p_size) over (partition by p_mfgr order by p_name range between unbounded preceding and current row) as s2 +from noopwithmapstreaming(on + noop(on + noopstreaming(on part + partition by p_mfgr, p_name + order by p_mfgr, p_name) + )) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, p_name, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +p_size, +sum(p_size) over (partition by p_mfgr order by p_name range between unbounded preceding and current row) as s1, +sum(p_size) over (partition by p_mfgr order by p_name range between unbounded preceding and current row) as s2 +from noopwithmapstreaming(on + noop(on + noopstreaming(on part + partition by p_mfgr, p_name + order by p_mfgr, p_name) + )) +POSTHOOK: type: QUERY POSTHOOK: Input: default@part #### A masked pattern was here #### Manufacturer#1 almond antique burnished rose metallic 1 1 2 4 4 diff --git ql/src/test/results/clientpositive/spark/vectorized_ptf.q.out ql/src/test/results/clientpositive/spark/vectorized_ptf.q.out index f3b61ce..020fdff 100644 --- ql/src/test/results/clientpositive/spark/vectorized_ptf.q.out +++ ql/src/test/results/clientpositive/spark/vectorized_ptf.q.out @@ -244,7 +244,7 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string), BLOCK__OFFSET__INSIDE__FILE (type: bigint), INPUT__FILE__NAME (type: string), ROW__ID (type: struct) + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int), p_retailprice (type: double) auto parallelism: false Path -> Alias: #### A masked pattern was here #### @@ -295,6 +295,7 @@ STAGE PLANS: name: default.part_orc Truncated Path -> Alias: /part_orc [part_orc] + Execution mode: vectorized Reducer 2 Needs Tagging: false Reduce Operator Tree: @@ -302,18 +303,14 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE PTF Operator Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int), _col7 (type: double) - outputColumnNames: _col1, _col2, _col5, _col7 + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col2 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col2 (type: string) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col5 (type: int), _col7 (type: double) - auto parallelism: false + tag: -1 + value expressions: _col5 (type: int), _col7 (type: double) + auto parallelism: false Reducer 3 Needs Tagging: false Reduce Operator Tree: @@ -536,7 +533,7 @@ STAGE PLANS: Map-reduce partition columns: p_partkey (type: int) Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE tag: 0 - value expressions: p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int) auto parallelism: false Path -> Alias: #### A masked pattern was here #### @@ -664,7 +661,7 @@ STAGE PLANS: keys: 0 p_partkey (type: int) 1 p_partkey (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + outputColumnNames: _col1, _col2, _col5 Statistics: Num rows: 14 Data size: 8823 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col2 (type: string), _col1 (type: string) @@ -672,7 +669,7 @@ STAGE PLANS: Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 14 Data size: 8823 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) auto parallelism: false Reducer 3 Needs Tagging: false @@ -681,18 +678,14 @@ STAGE PLANS: Statistics: Num rows: 14 Data size: 8823 Basic stats: COMPLETE Column stats: NONE PTF Operator Statistics: Num rows: 14 Data size: 8823 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) - outputColumnNames: _col1, _col2, _col5 + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 14 Data size: 8823 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col2 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col2 (type: string) - Statistics: Num rows: 14 Data size: 8823 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col5 (type: int) - auto parallelism: false + tag: -1 + value expressions: _col5 (type: int) + auto parallelism: false Reducer 4 Needs Tagging: false Reduce Operator Tree: @@ -850,7 +843,7 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string), BLOCK__OFFSET__INSIDE__FILE (type: bigint), INPUT__FILE__NAME (type: string), ROW__ID (type: struct) + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int) auto parallelism: false Path -> Alias: #### A masked pattern was here #### @@ -901,6 +894,7 @@ STAGE PLANS: name: default.part_orc Truncated Path -> Alias: /part_orc [part_orc] + Execution mode: vectorized Reducer 2 Needs Tagging: false Reduce Operator Tree: @@ -1107,7 +1101,7 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string), BLOCK__OFFSET__INSIDE__FILE (type: bigint), INPUT__FILE__NAME (type: string), ROW__ID (type: struct) + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int), p_retailprice (type: double) auto parallelism: false Path -> Alias: #### A masked pattern was here #### @@ -1158,6 +1152,7 @@ STAGE PLANS: name: default.part_orc Truncated Path -> Alias: /part_orc [part_orc] + Execution mode: vectorized Reducer 2 Needs Tagging: false Reduce Operator Tree: @@ -1165,18 +1160,14 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE PTF Operator Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int), _col7 (type: double) - outputColumnNames: _col1, _col2, _col5, _col7 + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col2 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col2 (type: string) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col5 (type: int), _col7 (type: double) - auto parallelism: false + tag: -1 + value expressions: _col5 (type: int), _col7 (type: double) + auto parallelism: false Reducer 3 Needs Tagging: false Reduce Operator Tree: @@ -1397,7 +1388,7 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string), BLOCK__OFFSET__INSIDE__FILE (type: bigint), INPUT__FILE__NAME (type: string), ROW__ID (type: struct) + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int) auto parallelism: false Path -> Alias: #### A masked pattern was here #### @@ -1448,6 +1439,7 @@ STAGE PLANS: name: default.part_orc Truncated Path -> Alias: /part_orc [part_orc] + Execution mode: vectorized Reducer 2 Needs Tagging: false Reduce Operator Tree: @@ -1455,18 +1447,14 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE PTF Operator Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) - outputColumnNames: _col1, _col2, _col5 + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col2 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col2 (type: string) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col5 (type: int) - auto parallelism: false + tag: -1 + value expressions: _col5 (type: int) + auto parallelism: false Reducer 3 Needs Tagging: false Reduce Operator Tree: @@ -1697,7 +1685,7 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string), BLOCK__OFFSET__INSIDE__FILE (type: bigint), INPUT__FILE__NAME (type: string), ROW__ID (type: struct) + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int) auto parallelism: false Path -> Alias: #### A masked pattern was here #### @@ -1748,6 +1736,7 @@ STAGE PLANS: name: default.part_orc Truncated Path -> Alias: /part_orc [part_orc] + Execution mode: vectorized Reducer 2 Needs Tagging: false Reduce Operator Tree: @@ -1963,7 +1952,7 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string), BLOCK__OFFSET__INSIDE__FILE (type: bigint), INPUT__FILE__NAME (type: string), ROW__ID (type: struct) + value expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) auto parallelism: false Path -> Alias: #### A masked pattern was here #### @@ -2014,6 +2003,7 @@ STAGE PLANS: name: default.part_orc Truncated Path -> Alias: /part_orc [part_orc] + Execution mode: vectorized Map 4 Map Operator Tree: TableScan @@ -2088,22 +2078,18 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE PTF Operator Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Filter Operator - isSamplingPred: false - predicate: _col0 is not null (type: boolean) + Filter Operator + isSamplingPred: false + predicate: _col0 is not null (type: boolean) + Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE - tag: 0 - value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) - auto parallelism: false + tag: 0 + value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + auto parallelism: false Reducer 3 Needs Tagging: true Reduce Operator Tree: @@ -2343,7 +2329,7 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string), BLOCK__OFFSET__INSIDE__FILE (type: bigint), INPUT__FILE__NAME (type: string), ROW__ID (type: struct) + value expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) auto parallelism: false Path -> Alias: #### A masked pattern was here #### @@ -2394,6 +2380,7 @@ STAGE PLANS: name: default.part_orc Truncated Path -> Alias: /part_orc [part_orc] + Execution mode: vectorized Reducer 2 Needs Tagging: true Reduce Operator Tree: @@ -2437,22 +2424,18 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE PTF Operator Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Filter Operator - isSamplingPred: false - predicate: _col0 is not null (type: boolean) + Filter Operator + isSamplingPred: false + predicate: _col0 is not null (type: boolean) + Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE - tag: 1 - value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) - auto parallelism: false + tag: 1 + value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + auto parallelism: false Stage: Stage-0 Fetch Operator @@ -2600,7 +2583,7 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string), BLOCK__OFFSET__INSIDE__FILE (type: bigint), INPUT__FILE__NAME (type: string), ROW__ID (type: struct) + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int) auto parallelism: false Path -> Alias: #### A masked pattern was here #### @@ -2658,17 +2641,13 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE PTF Operator Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) - outputColumnNames: _col1, _col2, _col5 + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int) + sort order: ++- + Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int) - sort order: ++- - Map-reduce partition columns: _col2 (type: string) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 - auto parallelism: false + tag: -1 + auto parallelism: false Reducer 3 Needs Tagging: false Reduce Operator Tree: @@ -2878,7 +2857,7 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string), BLOCK__OFFSET__INSIDE__FILE (type: bigint), INPUT__FILE__NAME (type: string), ROW__ID (type: struct) + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int), p_retailprice (type: double) auto parallelism: false Path -> Alias: #### A masked pattern was here #### @@ -2936,18 +2915,14 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE PTF Operator Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int), _col7 (type: double) - outputColumnNames: _col1, _col2, _col5, _col7 + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col2 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col2 (type: string) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col5 (type: int), _col7 (type: double) - auto parallelism: false + tag: -1 + value expressions: _col5 (type: int), _col7 (type: double) + auto parallelism: false Reducer 3 Needs Tagging: false Reduce Operator Tree: @@ -3159,7 +3134,7 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string), BLOCK__OFFSET__INSIDE__FILE (type: bigint), INPUT__FILE__NAME (type: string), ROW__ID (type: struct) + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int), p_retailprice (type: double) auto parallelism: false Path -> Alias: #### A masked pattern was here #### @@ -3210,6 +3185,7 @@ STAGE PLANS: name: default.part_orc Truncated Path -> Alias: /part_orc [part_orc] + Execution mode: vectorized Reducer 2 Needs Tagging: false Reduce Operator Tree: @@ -3217,18 +3193,14 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE PTF Operator Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int), _col7 (type: double) - outputColumnNames: _col1, _col2, _col5, _col7 + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col2 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col2 (type: string) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col5 (type: int), _col7 (type: double) - auto parallelism: false + tag: -1 + value expressions: _col5 (type: int), _col7 (type: double) + auto parallelism: false Reducer 3 Needs Tagging: false Reduce Operator Tree: @@ -3450,7 +3422,7 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string), BLOCK__OFFSET__INSIDE__FILE (type: bigint), INPUT__FILE__NAME (type: string), ROW__ID (type: struct) + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int), p_retailprice (type: double) auto parallelism: false Path -> Alias: #### A masked pattern was here #### @@ -3501,6 +3473,7 @@ STAGE PLANS: name: default.part_orc Truncated Path -> Alias: /part_orc [part_orc] + Execution mode: vectorized Reducer 2 Needs Tagging: false Reduce Operator Tree: @@ -3516,7 +3489,7 @@ STAGE PLANS: Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: bigint), _col10 (type: string), _col11 (type: struct) + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int), _col7 (type: double) auto parallelism: false Reducer 3 Needs Tagging: false @@ -3525,18 +3498,14 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE PTF Operator Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int), _col7 (type: double) - outputColumnNames: _col1, _col2, _col5, _col7 + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col2 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col2 (type: string) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col5 (type: int), _col7 (type: double) - auto parallelism: false + tag: -1 + value expressions: _col5 (type: int), _col7 (type: double) + auto parallelism: false Reducer 4 Needs Tagging: false Reduce Operator Tree: @@ -3778,7 +3747,7 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string), BLOCK__OFFSET__INSIDE__FILE (type: bigint), INPUT__FILE__NAME (type: string), ROW__ID (type: struct) + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int), p_retailprice (type: double) auto parallelism: false Path -> Alias: #### A masked pattern was here #### @@ -3829,6 +3798,7 @@ STAGE PLANS: name: default.part_orc Truncated Path -> Alias: /part_orc [part_orc] + Execution mode: vectorized Reducer 2 Needs Tagging: false Reduce Operator Tree: @@ -3836,18 +3806,14 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE PTF Operator Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int), _col7 (type: double) - outputColumnNames: _col1, _col2, _col5, _col7 + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col2 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col2 (type: string) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col5 (type: int), _col7 (type: double) - auto parallelism: false + tag: -1 + value expressions: _col5 (type: int), _col7 (type: double) + auto parallelism: false Reducer 3 Needs Tagging: false Reduce Operator Tree: @@ -4166,7 +4132,7 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string), BLOCK__OFFSET__INSIDE__FILE (type: bigint), INPUT__FILE__NAME (type: string), ROW__ID (type: struct) + value expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_size (type: int), p_retailprice (type: double) auto parallelism: false Path -> Alias: #### A masked pattern was here #### @@ -4217,6 +4183,7 @@ STAGE PLANS: name: default.part_orc Truncated Path -> Alias: /part_orc [part_orc] + Execution mode: vectorized Map 5 Map Operator Tree: TableScan @@ -4291,22 +4258,18 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE PTF Operator Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col5 (type: int), _col7 (type: double) - outputColumnNames: _col0, _col1, _col2, _col5, _col7 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Filter Operator - isSamplingPred: false - predicate: _col0 is not null (type: boolean) + Filter Operator + isSamplingPred: false + predicate: _col0 is not null (type: boolean) + Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE - tag: 0 - value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int), _col7 (type: double) - auto parallelism: false + tag: 0 + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int), _col7 (type: double) + auto parallelism: false Reducer 3 Needs Tagging: true Reduce Operator Tree: @@ -4494,7 +4457,7 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string), BLOCK__OFFSET__INSIDE__FILE (type: bigint), INPUT__FILE__NAME (type: string), ROW__ID (type: struct) + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int) auto parallelism: false Path -> Alias: #### A masked pattern was here #### @@ -4545,6 +4508,7 @@ STAGE PLANS: name: default.part_orc Truncated Path -> Alias: /part_orc [part_orc] + Execution mode: vectorized Reducer 2 Needs Tagging: false Reduce Operator Tree: @@ -5211,10 +5175,10 @@ TOK_QUERY STAGE DEPENDENCIES: Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 - Stage-4 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-0 + Stage-1 depends on stages: Stage-2 + Stage-4 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-2 @@ -5223,8 +5187,8 @@ STAGE PLANS: Reducer 6 <- Map 1 (PARTITION-LEVEL SORT, 2) Reducer 7 <- Map 1 (PARTITION-LEVEL SORT, 2) Reducer 3 <- Reducer 6 (PARTITION-LEVEL SORT, 2) - Reducer 5 <- Reducer 7 (PARTITION-LEVEL SORT, 2) - Reducer 4 <- Reducer 3 (PARTITION-LEVEL SORT, 2) + Reducer 4 <- Reducer 7 (PARTITION-LEVEL SORT, 2) + Reducer 5 <- Reducer 4 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -5239,7 +5203,7 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string), BLOCK__OFFSET__INSIDE__FILE (type: bigint), INPUT__FILE__NAME (type: string), ROW__ID (type: struct) + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int), p_retailprice (type: double) auto parallelism: false Path -> Alias: #### A masked pattern was here #### @@ -5290,39 +5254,23 @@ STAGE PLANS: name: default.part_orc Truncated Path -> Alias: /part_orc [part_orc] + Execution mode: vectorized Reducer 3 Needs Tagging: false Reduce Operator Tree: Select Operator - expressions: VALUE._col1 (type: string), KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: int) - outputColumnNames: _col1, _col2, _col5 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - PTF Operator - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col2 (type: string), _col2 (type: string), _col1 (type: string) - sort order: +++ - Map-reduce partition columns: _col2 (type: string) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _wcol0 (type: bigint), _col5 (type: int) - auto parallelism: false - Reducer 4 - Needs Tagging: false - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: bigint), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col4 (type: int) - outputColumnNames: _col0, _col2, _col3, _col6 + expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) + outputColumnNames: _col1, _col2, _col5, _col7 Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE PTF Operator Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col3 (type: string), _col2 (type: string), _col6 (type: int), UDFToInteger(round(_col0, 1)) (type: int), _wcol1 (type: int), _wcol2 (type: int), _wcol3 (type: double), _wcol4 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 2 + GlobalTableId: 1 #### A masked pattern was here #### NumFilesPerFileSink: 1 Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE @@ -5332,36 +5280,53 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: bucket_count -1 - columns p_mfgr,p_name,p_size,s2,r,dr,cud,fv1 + columns p_mfgr,p_name,p_size,r,dr,s columns.comments - columns.types string:string:int:int:int:int:double:int + columns.types string:string:int:int:int:double #### A masked pattern was here #### - name default.part_5 - serialization.ddl struct part_5 { string p_mfgr, string p_name, i32 p_size, i32 s2, i32 r, i32 dr, double cud, i32 fv1} + name default.part_4 + serialization.ddl struct part_4 { string p_mfgr, string p_name, i32 p_size, i32 r, i32 dr, double s} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.part_5 + name: default.part_4 TotalFiles: 1 GatherStats: true MultiFileSpray: false + Reducer 4 + Needs Tagging: false + Reduce Operator Tree: + Select Operator + expressions: VALUE._col1 (type: string), KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: int) + outputColumnNames: _col1, _col2, _col5 + Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: string), _col2 (type: string), _col1 (type: string) + sort order: +++ + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _wcol0 (type: bigint), _col5 (type: int) + auto parallelism: false Reducer 5 Needs Tagging: false Reduce Operator Tree: Select Operator - expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) - outputColumnNames: _col1, _col2, _col5, _col7 + expressions: VALUE._col0 (type: bigint), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col4 (type: int) + outputColumnNames: _col0, _col2, _col3, _col6 Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE PTF Operator Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: double) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + expressions: _col3 (type: string), _col2 (type: string), _col6 (type: int), UDFToInteger(round(_col0, 1)) (type: int), _wcol1 (type: int), _wcol2 (type: int), _wcol3 (type: double), _wcol4 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + GlobalTableId: 2 #### A masked pattern was here #### NumFilesPerFileSink: 1 Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE @@ -5371,17 +5336,17 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: bucket_count -1 - columns p_mfgr,p_name,p_size,r,dr,s + columns p_mfgr,p_name,p_size,s2,r,dr,cud,fv1 columns.comments - columns.types string:string:int:int:int:double + columns.types string:string:int:int:int:int:double:int #### A masked pattern was here #### - name default.part_4 - serialization.ddl struct part_4 { string p_mfgr, string p_name, i32 p_size, i32 r, i32 dr, double s} + name default.part_5 + serialization.ddl struct part_5 { string p_mfgr, string p_name, i32 p_size, i32 s2, i32 r, i32 dr, double cud, i32 fv1} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.part_4 + name: default.part_5 TotalFiles: 1 GatherStats: true MultiFileSpray: false @@ -5393,12 +5358,12 @@ STAGE PLANS: PTF Operator Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col2 (type: string), _col5 (type: int) + key expressions: _col2 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: _col1 (type: string) + value expressions: _col5 (type: int), _col7 (type: double) auto parallelism: false Reducer 7 Needs Tagging: false @@ -5407,20 +5372,16 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE PTF Operator Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int), _col7 (type: double) - outputColumnNames: _col1, _col2, _col5, _col7 + Reduce Output Operator + key expressions: _col2 (type: string), _col5 (type: int) + sort order: ++ + Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col2 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col2 (type: string) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col5 (type: int), _col7 (type: double) - auto parallelism: false + tag: -1 + value expressions: _col1 (type: string) + auto parallelism: false - Stage: Stage-1 + Stage: Stage-0 Move Operator tables: replace: true @@ -5430,23 +5391,23 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: bucket_count -1 - columns p_mfgr,p_name,p_size,s2,r,dr,cud,fv1 + columns p_mfgr,p_name,p_size,r,dr,s columns.comments - columns.types string:string:int:int:int:int:double:int + columns.types string:string:int:int:int:double #### A masked pattern was here #### - name default.part_5 - serialization.ddl struct part_5 { string p_mfgr, string p_name, i32 p_size, i32 s2, i32 r, i32 dr, double cud, i32 fv1} + name default.part_4 + serialization.ddl struct part_4 { string p_mfgr, string p_name, i32 p_size, i32 r, i32 dr, double s} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.part_5 + name: default.part_4 Stage: Stage-3 Stats-Aggr Operator #### A masked pattern was here #### - Stage: Stage-0 + Stage: Stage-1 Move Operator tables: replace: true @@ -5456,17 +5417,17 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: bucket_count -1 - columns p_mfgr,p_name,p_size,r,dr,s + columns p_mfgr,p_name,p_size,s2,r,dr,cud,fv1 columns.comments - columns.types string:string:int:int:int:double + columns.types string:string:int:int:int:int:double:int #### A masked pattern was here #### - name default.part_4 - serialization.ddl struct part_4 { string p_mfgr, string p_name, i32 p_size, i32 r, i32 dr, double s} + name default.part_5 + serialization.ddl struct part_5 { string p_mfgr, string p_name, i32 p_size, i32 s2, i32 r, i32 dr, double cud, i32 fv1} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.part_4 + name: default.part_5 Stage: Stage-4 Stats-Aggr Operator @@ -5757,7 +5718,7 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string), BLOCK__OFFSET__INSIDE__FILE (type: bigint), INPUT__FILE__NAME (type: string), ROW__ID (type: struct) + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int) auto parallelism: false Path -> Alias: #### A masked pattern was here #### @@ -5808,6 +5769,7 @@ STAGE PLANS: name: default.part_orc Truncated Path -> Alias: /part_orc [part_orc] + Execution mode: vectorized Reducer 2 Needs Tagging: false Reduce Operator Tree: @@ -5823,7 +5785,7 @@ STAGE PLANS: Map-reduce partition columns: _col2 (type: string), _col1 (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: bigint), _col10 (type: string), _col11 (type: struct) + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) auto parallelism: false Reducer 3 Needs Tagging: false @@ -5832,18 +5794,14 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE PTF Operator Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) - outputColumnNames: _col1, _col2, _col5 + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string), _col2 (type: string), _col1 (type: string) + sort order: ++++ + Map-reduce partition columns: _col2 (type: string), _col1 (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col2 (type: string), _col1 (type: string), _col2 (type: string), _col1 (type: string) - sort order: ++++ - Map-reduce partition columns: _col2 (type: string), _col1 (type: string) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col5 (type: int) - auto parallelism: false + tag: -1 + value expressions: _col5 (type: int) + auto parallelism: false Reducer 4 Needs Tagging: false Reduce Operator Tree: @@ -6116,7 +6074,7 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string), BLOCK__OFFSET__INSIDE__FILE (type: bigint), INPUT__FILE__NAME (type: string), ROW__ID (type: struct) + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int) auto parallelism: false Path -> Alias: #### A masked pattern was here #### @@ -6167,6 +6125,7 @@ STAGE PLANS: name: default.part_orc Truncated Path -> Alias: /part_orc [part_orc] + Execution mode: vectorized Reducer 2 Needs Tagging: false Reduce Operator Tree: @@ -6180,7 +6139,7 @@ STAGE PLANS: Map-reduce partition columns: _col2 (type: string), _col1 (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: bigint), _col10 (type: string), _col11 (type: struct) + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) auto parallelism: false Reducer 3 Needs Tagging: false @@ -6195,7 +6154,7 @@ STAGE PLANS: Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: bigint), _col10 (type: string), _col11 (type: struct) + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) auto parallelism: false Reducer 4 Needs Tagging: false @@ -6204,18 +6163,14 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE PTF Operator Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) - outputColumnNames: _col1, _col2, _col5 + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col2 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col2 (type: string) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col5 (type: int) - auto parallelism: false + tag: -1 + value expressions: _col5 (type: int) + auto parallelism: false Reducer 5 Needs Tagging: false Reduce Operator Tree: @@ -6471,7 +6426,7 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string), p_name (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string), BLOCK__OFFSET__INSIDE__FILE (type: bigint), INPUT__FILE__NAME (type: string), ROW__ID (type: struct) + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int) auto parallelism: false Path -> Alias: #### A masked pattern was here #### @@ -6522,6 +6477,7 @@ STAGE PLANS: name: default.part_orc Truncated Path -> Alias: /part_orc [part_orc] + Execution mode: vectorized Reducer 2 Needs Tagging: false Reduce Operator Tree: @@ -6535,7 +6491,7 @@ STAGE PLANS: Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: bigint), _col10 (type: string), _col11 (type: struct) + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) auto parallelism: false Reducer 3 Needs Tagging: false @@ -6544,18 +6500,14 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE PTF Operator Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) - outputColumnNames: _col1, _col2, _col5 + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col2 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col2 (type: string) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col5 (type: int) - auto parallelism: false + tag: -1 + value expressions: _col5 (type: int) + auto parallelism: false Reducer 4 Needs Tagging: false Reduce Operator Tree: @@ -6823,7 +6775,7 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string), p_name (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string), BLOCK__OFFSET__INSIDE__FILE (type: bigint), INPUT__FILE__NAME (type: string), ROW__ID (type: struct) + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int) auto parallelism: false Path -> Alias: #### A masked pattern was here #### @@ -6874,6 +6826,7 @@ STAGE PLANS: name: default.part_orc Truncated Path -> Alias: /part_orc [part_orc] + Execution mode: vectorized Reducer 2 Needs Tagging: false Reduce Operator Tree: @@ -6887,7 +6840,7 @@ STAGE PLANS: Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: bigint), _col10 (type: string), _col11 (type: struct) + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) auto parallelism: false Reducer 3 Needs Tagging: false @@ -6904,7 +6857,7 @@ STAGE PLANS: Map-reduce partition columns: _col2 (type: string), _col1 (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: bigint), _col10 (type: string), _col11 (type: struct) + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) auto parallelism: false Reducer 4 Needs Tagging: false @@ -6913,18 +6866,14 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE PTF Operator Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) - outputColumnNames: _col1, _col2, _col5 + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string), _col2 (type: string), _col1 (type: string) + sort order: ++++ + Map-reduce partition columns: _col2 (type: string), _col1 (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col2 (type: string), _col1 (type: string), _col2 (type: string), _col1 (type: string) - sort order: ++++ - Map-reduce partition columns: _col2 (type: string), _col1 (type: string) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col5 (type: int) - auto parallelism: false + tag: -1 + value expressions: _col5 (type: int) + auto parallelism: false Reducer 5 Needs Tagging: false Reduce Operator Tree: @@ -7223,7 +7172,7 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string), p_name (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string), BLOCK__OFFSET__INSIDE__FILE (type: bigint), INPUT__FILE__NAME (type: string), ROW__ID (type: struct) + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int) auto parallelism: false Path -> Alias: #### A masked pattern was here #### @@ -7274,6 +7223,7 @@ STAGE PLANS: name: default.part_orc Truncated Path -> Alias: /part_orc [part_orc] + Execution mode: vectorized Reducer 2 Needs Tagging: false Reduce Operator Tree: @@ -7289,7 +7239,7 @@ STAGE PLANS: Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: bigint), _col10 (type: string), _col11 (type: struct) + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) auto parallelism: false Reducer 3 Needs Tagging: false @@ -7298,18 +7248,14 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE PTF Operator Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) - outputColumnNames: _col1, _col2, _col5 + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string), _col2 (type: string), _col1 (type: string) + sort order: ++++ + Map-reduce partition columns: _col2 (type: string), _col1 (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col2 (type: string), _col1 (type: string), _col2 (type: string), _col1 (type: string) - sort order: ++++ - Map-reduce partition columns: _col2 (type: string), _col1 (type: string) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col5 (type: int) - auto parallelism: false + tag: -1 + value expressions: _col5 (type: int) + auto parallelism: false Reducer 4 Needs Tagging: false Reduce Operator Tree: @@ -7574,7 +7520,7 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string), p_name (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string), BLOCK__OFFSET__INSIDE__FILE (type: bigint), INPUT__FILE__NAME (type: string), ROW__ID (type: struct) + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int) auto parallelism: false Path -> Alias: #### A masked pattern was here #### @@ -7625,6 +7571,7 @@ STAGE PLANS: name: default.part_orc Truncated Path -> Alias: /part_orc [part_orc] + Execution mode: vectorized Reducer 2 Needs Tagging: false Reduce Operator Tree: @@ -7640,7 +7587,7 @@ STAGE PLANS: Map-reduce partition columns: _col2 (type: string), _col1 (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: bigint), _col10 (type: string), _col11 (type: struct) + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) auto parallelism: false Reducer 3 Needs Tagging: false @@ -7649,18 +7596,14 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE PTF Operator Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) - outputColumnNames: _col1, _col2, _col5 + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col2 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col2 (type: string) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col5 (type: int) - auto parallelism: false + tag: -1 + value expressions: _col5 (type: int) + auto parallelism: false Reducer 4 Needs Tagging: false Reduce Operator Tree: diff --git ql/src/test/results/clientpositive/tez/ptf.q.out ql/src/test/results/clientpositive/tez/ptf.q.out index 9196b94..6f9dd91 100644 --- ql/src/test/results/clientpositive/tez/ptf.q.out +++ ql/src/test/results/clientpositive/tez/ptf.q.out @@ -1,6 +1,7 @@ PREHOOK: query: -- SORT_QUERY_RESULTS --1. test1 +explain select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -10,11 +11,10 @@ from noop(on part order by p_name ) PREHOOK: type: QUERY -PREHOOK: Input: default@part -#### A masked pattern was here #### POSTHOOK: query: -- SORT_QUERY_RESULTS --1. test1 +explain select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -24,6 +24,87 @@ from noop(on part order by p_name ) POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_mfgr (type: string), p_name (type: string) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int), p_retailprice (type: double) + Reducer 2 + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col5 (type: int), _col7 (type: double) + Reducer 3 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) + outputColumnNames: _col1, _col2, _col5, _col7 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr, p_name, p_size, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +sum(p_retailprice) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s1 +from noop(on part + partition by p_mfgr + order by p_name + ) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, p_name, p_size, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +sum(p_retailprice) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s1 +from noop(on part + partition by p_mfgr + order by p_name + ) +POSTHOOK: type: QUERY POSTHOOK: Input: default@part #### A masked pattern was here #### Manufacturer#1 almond antique burnished rose metallic 2 1 1 1173.15 @@ -53,21 +134,129 @@ Manufacturer#5 almond antique sky peru orange 2 3 3 5190.08 Manufacturer#5 almond aquamarine dodger light gainsboro 46 4 4 6208.18 Manufacturer#5 almond azure blanched chiffon midnight 23 5 5 7672.66 PREHOOK: query: -- 2. testJoinWithNoop +explain select p_mfgr, p_name, p_size, p_size - lag(p_size,1,p_size) over (partition by p_mfgr order by p_name) as deltaSz from noop (on (select p1.* from part p1 join part p2 on p1.p_partkey = p2.p_partkey) j distribute by j.p_mfgr sort by j.p_name) PREHOOK: type: QUERY -PREHOOK: Input: default@part -#### A masked pattern was here #### POSTHOOK: query: -- 2. testJoinWithNoop +explain select p_mfgr, p_name, p_size, p_size - lag(p_size,1,p_size) over (partition by p_mfgr order by p_name) as deltaSz from noop (on (select p1.* from part p1 join part p2 on p1.p_partkey = p2.p_partkey) j distribute by j.p_mfgr sort by j.p_name) POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: p1 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: p_partkey is not null (type: boolean) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_partkey (type: int) + sort order: + + Map-reduce partition columns: p_partkey (type: int) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int) + Map 5 + Map Operator Tree: + TableScan + alias: p2 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: p_partkey is not null (type: boolean) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_partkey (type: int) + sort order: + + Map-reduce partition columns: p_partkey (type: int) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 p_partkey (type: int) + 1 p_partkey (type: int) + outputColumnNames: _col1, _col2, _col5 + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) + Reducer 3 + Reduce Operator Tree: + Extract + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + value expressions: _col5 (type: int) + Reducer 4 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) + outputColumnNames: _col1, _col2, _col5 + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), (_col5 - _wcol0) (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr, p_name, +p_size, p_size - lag(p_size,1,p_size) over (partition by p_mfgr order by p_name) as deltaSz +from noop (on (select p1.* from part p1 join part p2 on p1.p_partkey = p2.p_partkey) j +distribute by j.p_mfgr +sort by j.p_name) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, p_name, +p_size, p_size - lag(p_size,1,p_size) over (partition by p_mfgr order by p_name) as deltaSz +from noop (on (select p1.* from part p1 join part p2 on p1.p_partkey = p2.p_partkey) j +distribute by j.p_mfgr +sort by j.p_name) +POSTHOOK: type: QUERY POSTHOOK: Input: default@part #### A masked pattern was here #### Manufacturer#1 almond antique burnished rose metallic 2 0 @@ -99,19 +288,77 @@ Manufacturer#5 almond antique sky peru orange 2 -4 Manufacturer#5 almond aquamarine dodger light gainsboro 46 44 Manufacturer#5 almond azure blanched chiffon midnight 23 -23 PREHOOK: query: -- 3. testOnlyPTF +explain select p_mfgr, p_name, p_size from noop(on part partition by p_mfgr order by p_name) PREHOOK: type: QUERY -PREHOOK: Input: default@part -#### A masked pattern was here #### POSTHOOK: query: -- 3. testOnlyPTF +explain select p_mfgr, p_name, p_size from noop(on part partition by p_mfgr order by p_name) POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_mfgr (type: string), p_name (type: string) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int) + Reducer 2 + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr, p_name, p_size +from noop(on part +partition by p_mfgr +order by p_name) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, p_name, p_size +from noop(on part +partition by p_mfgr +order by p_name) +POSTHOOK: type: QUERY POSTHOOK: Input: default@part #### A masked pattern was here #### Manufacturer#1 almond antique burnished rose metallic 2 @@ -141,6 +388,7 @@ Manufacturer#5 almond antique sky peru orange 2 Manufacturer#5 almond aquamarine dodger light gainsboro 46 Manufacturer#5 almond azure blanched chiffon midnight 23 PREHOOK: query: -- 4. testPTFAlias +explain select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -150,9 +398,8 @@ from noop(on part order by p_name ) abc PREHOOK: type: QUERY -PREHOOK: Input: default@part -#### A masked pattern was here #### POSTHOOK: query: -- 4. testPTFAlias +explain select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -162,6 +409,87 @@ from noop(on part order by p_name ) abc POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_mfgr (type: string), p_name (type: string) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int), p_retailprice (type: double) + Reducer 2 + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col5 (type: int), _col7 (type: double) + Reducer 3 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) + outputColumnNames: _col1, _col2, _col5, _col7 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr, p_name, p_size, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +sum(p_retailprice) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s1 +from noop(on part + partition by p_mfgr + order by p_name + ) abc +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, p_name, p_size, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +sum(p_retailprice) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s1 +from noop(on part + partition by p_mfgr + order by p_name + ) abc +POSTHOOK: type: QUERY POSTHOOK: Input: default@part #### A masked pattern was here #### Manufacturer#1 almond antique burnished rose metallic 2 1 1 1173.15 @@ -191,6 +519,7 @@ Manufacturer#5 almond antique sky peru orange 2 3 3 5190.08 Manufacturer#5 almond aquamarine dodger light gainsboro 46 4 4 6208.18 Manufacturer#5 almond azure blanched chiffon midnight 23 5 5 7672.66 PREHOOK: query: -- 5. testPTFAndWhereWithWindowing +explain select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -200,9 +529,8 @@ from noop(on part order by p_name ) PREHOOK: type: QUERY -PREHOOK: Input: default@part -#### A masked pattern was here #### POSTHOOK: query: -- 5. testPTFAndWhereWithWindowing +explain select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -212,6 +540,87 @@ from noop(on part order by p_name ) POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_mfgr (type: string), p_name (type: string) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int) + Reducer 2 + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col5 (type: int) + Reducer 3 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) + outputColumnNames: _col1, _col2, _col5 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _col5 (type: int), (_col5 - _wcol2) (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr, p_name, p_size, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +p_size, p_size - lag(p_size,1,p_size) over (partition by p_mfgr order by p_name) as deltaSz +from noop(on part + partition by p_mfgr + order by p_name + ) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, p_name, p_size, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +p_size, p_size - lag(p_size,1,p_size) over (partition by p_mfgr order by p_name) as deltaSz +from noop(on part + partition by p_mfgr + order by p_name + ) +POSTHOOK: type: QUERY POSTHOOK: Input: default@part #### A masked pattern was here #### Manufacturer#1 almond antique burnished rose metallic 2 1 1 2 0 @@ -241,6 +650,7 @@ Manufacturer#5 almond antique sky peru orange 2 3 3 2 -4 Manufacturer#5 almond aquamarine dodger light gainsboro 46 4 4 46 44 Manufacturer#5 almond azure blanched chiffon midnight 23 5 5 23 -23 PREHOOK: query: -- 6. testSWQAndPTFAndGBy +explain select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -251,9 +661,8 @@ from noop(on part ) group by p_mfgr, p_name, p_size PREHOOK: type: QUERY -PREHOOK: Input: default@part -#### A masked pattern was here #### POSTHOOK: query: -- 6. testSWQAndPTFAndGBy +explain select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -264,6 +673,111 @@ from noop(on part ) group by p_mfgr, p_name, p_size POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_mfgr (type: string), p_name (type: string) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int) + Reducer 2 + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int) + outputColumnNames: _col2, _col1, _col5 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col2 (type: string), _col1 (type: string), _col5 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reducer 3 + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: int) + Reducer 4 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), VALUE._col0 (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int), _wcol0 (type: int), _wcol1 (type: int), _col2 (type: int), (_col2 - _wcol2) (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr, p_name, p_size, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +p_size, p_size - lag(p_size,1,p_size) over (partition by p_mfgr order by p_name) as deltaSz +from noop(on part + partition by p_mfgr + order by p_name + ) +group by p_mfgr, p_name, p_size +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, p_name, p_size, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +p_size, p_size - lag(p_size,1,p_size) over (partition by p_mfgr order by p_name) as deltaSz +from noop(on part + partition by p_mfgr + order by p_name + ) +group by p_mfgr, p_name, p_size +POSTHOOK: type: QUERY POSTHOOK: Input: default@part #### A masked pattern was here #### Manufacturer#1 almond antique burnished rose metallic 2 1 1 2 0 @@ -292,21 +806,114 @@ Manufacturer#5 almond antique sky peru orange 2 3 3 2 -4 Manufacturer#5 almond aquamarine dodger light gainsboro 46 4 4 46 44 Manufacturer#5 almond azure blanched chiffon midnight 23 5 5 23 -23 PREHOOK: query: -- 7. testJoin +explain select abc.* from noop(on part partition by p_mfgr order by p_name ) abc join part p1 on abc.p_partkey = p1.p_partkey PREHOOK: type: QUERY -PREHOOK: Input: default@part -#### A masked pattern was here #### POSTHOOK: query: -- 7. testJoin +explain select abc.* from noop(on part partition by p_mfgr order by p_name ) abc join part p1 on abc.p_partkey = p1.p_partkey POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Map 4 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_mfgr (type: string), p_name (type: string) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + Map 4 + Map Operator Tree: + TableScan + alias: p1 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: p_partkey is not null (type: boolean) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_partkey (type: int) + sort order: + + Map-reduce partition columns: p_partkey (type: int) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: _col0 is not null (type: boolean) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + Reducer 3 + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 p_partkey (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select abc.* +from noop(on part +partition by p_mfgr +order by p_name +) abc join part p1 on abc.p_partkey = p1.p_partkey +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select abc.* +from noop(on part +partition by p_mfgr +order by p_name +) abc join part p1 on abc.p_partkey = p1.p_partkey +POSTHOOK: type: QUERY POSTHOOK: Input: default@part #### A masked pattern was here #### 105685 almond antique violet chocolate turquoise Manufacturer#2 Brand#22 MEDIUM ANODIZED COPPER 14 MED CAN 1690.68 ly pending requ @@ -338,21 +945,114 @@ POSTHOOK: Input: default@part 86428 almond aquamarine burnished black steel Manufacturer#1 Brand#12 STANDARD ANODIZED STEEL 28 WRAP BAG 1414.42 arefully 90681 almond antique chartreuse khaki white Manufacturer#3 Brand#31 MEDIUM BURNISHED TIN 17 SM CASE 1671.68 are slyly after the sl PREHOOK: query: -- 8. testJoinRight +explain select abc.* from part p1 join noop(on part partition by p_mfgr order by p_name ) abc on abc.p_partkey = p1.p_partkey PREHOOK: type: QUERY -PREHOOK: Input: default@part -#### A masked pattern was here #### POSTHOOK: query: -- 8. testJoinRight +explain select abc.* from part p1 join noop(on part partition by p_mfgr order by p_name ) abc on abc.p_partkey = p1.p_partkey POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) + Reducer 4 <- Map 3 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: p1 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: p_partkey is not null (type: boolean) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_partkey (type: int) + sort order: + + Map-reduce partition columns: p_partkey (type: int) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Map 3 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_mfgr (type: string), p_name (type: string) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + Reducer 2 + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 p_partkey (type: int) + 1 _col0 (type: int) + outputColumnNames: _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col12 (type: int), _col13 (type: string), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: _col0 is not null (type: boolean) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select abc.* +from part p1 join noop(on part +partition by p_mfgr +order by p_name +) abc on abc.p_partkey = p1.p_partkey +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select abc.* +from part p1 join noop(on part +partition by p_mfgr +order by p_name +) abc on abc.p_partkey = p1.p_partkey +POSTHOOK: type: QUERY POSTHOOK: Input: default@part #### A masked pattern was here #### 105685 almond antique violet chocolate turquoise Manufacturer#2 Brand#22 MEDIUM ANODIZED COPPER 14 MED CAN 1690.68 ly pending requ @@ -384,21 +1084,97 @@ POSTHOOK: Input: default@part 86428 almond aquamarine burnished black steel Manufacturer#1 Brand#12 STANDARD ANODIZED STEEL 28 WRAP BAG 1414.42 arefully 90681 almond antique chartreuse khaki white Manufacturer#3 Brand#31 MEDIUM BURNISHED TIN 17 SM CASE 1671.68 are slyly after the sl PREHOOK: query: -- 9. testNoopWithMap +explain select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name, p_size desc) as r from noopwithmap(on part partition by p_mfgr order by p_name, p_size desc) PREHOOK: type: QUERY -PREHOOK: Input: default@part -#### A masked pattern was here #### POSTHOOK: query: -- 9. testNoopWithMap +explain select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name, p_size desc) as r from noopwithmap(on part partition by p_mfgr order by p_name, p_size desc) POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_mfgr (type: string), p_name (type: string), p_size (type: int) + sort order: ++- + Map-reduce partition columns: p_mfgr (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int) + Reducer 2 + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int) + sort order: ++- + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reducer 3 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), KEY.reducesinkkey2 (type: int) + outputColumnNames: _col1, _col2, _col5 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr, p_name, p_size, +rank() over (partition by p_mfgr order by p_name, p_size desc) as r +from noopwithmap(on part +partition by p_mfgr +order by p_name, p_size desc) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, p_name, p_size, +rank() over (partition by p_mfgr order by p_name, p_size desc) as r +from noopwithmap(on part +partition by p_mfgr +order by p_name, p_size desc) +POSTHOOK: type: QUERY POSTHOOK: Input: default@part #### A masked pattern was here #### Manufacturer#1 almond antique burnished rose metallic 2 1 @@ -428,6 +1204,7 @@ Manufacturer#5 almond antique sky peru orange 2 3 Manufacturer#5 almond aquamarine dodger light gainsboro 46 4 Manufacturer#5 almond azure blanched chiffon midnight 23 5 PREHOOK: query: -- 10. testNoopWithMapWithWindowing +explain select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -436,9 +1213,8 @@ from noopwithmap(on part partition by p_mfgr order by p_name) PREHOOK: type: QUERY -PREHOOK: Input: default@part -#### A masked pattern was here #### POSTHOOK: query: -- 10. testNoopWithMapWithWindowing +explain select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -447,6 +1223,87 @@ from noopwithmap(on part partition by p_mfgr order by p_name) POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_mfgr (type: string), p_name (type: string) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int), p_retailprice (type: double) + Reducer 2 + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col5 (type: int), _col7 (type: double) + Reducer 3 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) + outputColumnNames: _col1, _col2, _col5, _col7 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr, p_name, p_size, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +sum(p_retailprice) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s1 +from noopwithmap(on part + partition by p_mfgr + order by p_name) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, p_name, p_size, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +sum(p_retailprice) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s1 +from noopwithmap(on part + partition by p_mfgr + order by p_name) +POSTHOOK: type: QUERY POSTHOOK: Input: default@part #### A masked pattern was here #### Manufacturer#1 almond antique burnished rose metallic 2 1 1 1173.15 @@ -476,6 +1333,7 @@ Manufacturer#5 almond antique sky peru orange 2 3 3 5190.08 Manufacturer#5 almond aquamarine dodger light gainsboro 46 4 4 6208.18 Manufacturer#5 almond azure blanched chiffon midnight 23 5 5 7672.66 PREHOOK: query: -- 11. testHavingWithWindowingPTFNoGBY +explain select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -484,9 +1342,8 @@ from noop(on part partition by p_mfgr order by p_name) PREHOOK: type: QUERY -PREHOOK: Input: default@part -#### A masked pattern was here #### POSTHOOK: query: -- 11. testHavingWithWindowingPTFNoGBY +explain select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -495,6 +1352,85 @@ from noop(on part partition by p_mfgr order by p_name) POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_mfgr (type: string), p_name (type: string) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int), p_retailprice (type: double) + Reducer 2 + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col5 (type: int), _col7 (type: double) + Reducer 3 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) + outputColumnNames: _col1, _col2, _col5, _col7 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr, p_name, p_size, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +sum(p_retailprice) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s1 +from noop(on part +partition by p_mfgr +order by p_name) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, p_name, p_size, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +sum(p_retailprice) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s1 +from noop(on part +partition by p_mfgr +order by p_name) +POSTHOOK: type: QUERY POSTHOOK: Input: default@part #### A masked pattern was here #### Manufacturer#1 almond antique burnished rose metallic 2 1 1 1173.15 @@ -524,6 +1460,7 @@ Manufacturer#5 almond antique sky peru orange 2 3 3 5190.08 Manufacturer#5 almond aquamarine dodger light gainsboro 46 4 4 6208.18 Manufacturer#5 almond azure blanched chiffon midnight 23 5 5 7672.66 PREHOOK: query: -- 12. testFunctionChain +explain select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -533,9 +1470,8 @@ partition by p_mfgr order by p_mfgr, p_name ))) PREHOOK: type: QUERY -PREHOOK: Input: default@part -#### A masked pattern was here #### POSTHOOK: query: -- 12. testFunctionChain +explain select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -545,6 +1481,102 @@ partition by p_mfgr order by p_mfgr, p_name ))) POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_mfgr (type: string), p_mfgr (type: string), p_name (type: string) + sort order: +++ + Map-reduce partition columns: p_mfgr (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int), p_retailprice (type: double) + Reducer 2 + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: string), _col2 (type: string), _col1 (type: string) + sort order: +++ + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int), _col7 (type: double) + Reducer 3 + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col5 (type: int), _col7 (type: double) + Reducer 4 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) + outputColumnNames: _col1, _col2, _col5, _col7 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr, p_name, p_size, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +sum(p_retailprice) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s1 +from noop(on noopwithmap(on noop(on part +partition by p_mfgr +order by p_mfgr, p_name +))) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, p_name, p_size, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +sum(p_retailprice) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s1 +from noop(on noopwithmap(on noop(on part +partition by p_mfgr +order by p_mfgr, p_name +))) +POSTHOOK: type: QUERY POSTHOOK: Input: default@part #### A masked pattern was here #### Manufacturer#1 almond antique burnished rose metallic 2 1 1 1173.15 @@ -574,6 +1606,7 @@ Manufacturer#5 almond antique sky peru orange 2 3 3 5190.08 Manufacturer#5 almond aquamarine dodger light gainsboro 46 4 4 6208.18 Manufacturer#5 almond azure blanched chiffon midnight 23 5 5 7672.66 PREHOOK: query: -- 13. testPTFAndWindowingInSubQ +explain select p_mfgr, p_name, sub1.cd, sub1.s1 from (select p_mfgr, p_name, @@ -586,9 +1619,8 @@ order by p_name) window w1 as (partition by p_mfgr order by p_name rows between 2 preceding and 2 following) ) sub1 PREHOOK: type: QUERY -PREHOOK: Input: default@part -#### A masked pattern was here #### POSTHOOK: query: -- 13. testPTFAndWindowingInSubQ +explain select p_mfgr, p_name, sub1.cd, sub1.s1 from (select p_mfgr, p_name, @@ -601,6 +1633,93 @@ order by p_name) window w1 as (partition by p_mfgr order by p_name rows between 2 preceding and 2 following) ) sub1 POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_mfgr (type: string), p_name (type: string) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int), p_retailprice (type: double) + Reducer 2 + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col5 (type: int), _col7 (type: double) + Reducer 3 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) + outputColumnNames: _col1, _col2, _col5, _col7 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col1 (type: string), _wcol0 (type: bigint), _wcol1 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr, p_name, +sub1.cd, sub1.s1 +from (select p_mfgr, p_name, +count(p_size) over (partition by p_mfgr order by p_name) as cd, +p_retailprice, +sum(p_retailprice) over w1 as s1 +from noop(on part +partition by p_mfgr +order by p_name) +window w1 as (partition by p_mfgr order by p_name rows between 2 preceding and 2 following) +) sub1 +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, p_name, +sub1.cd, sub1.s1 +from (select p_mfgr, p_name, +count(p_size) over (partition by p_mfgr order by p_name) as cd, +p_retailprice, +sum(p_retailprice) over w1 as s1 +from noop(on part +partition by p_mfgr +order by p_name) +window w1 as (partition by p_mfgr order by p_name rows between 2 preceding and 2 following) +) sub1 +POSTHOOK: type: QUERY POSTHOOK: Input: default@part #### A masked pattern was here #### Manufacturer#1 almond antique burnished rose metallic 2 4100.06 @@ -630,6 +1749,7 @@ Manufacturer#5 almond antique sky peru orange 3 7672.66 Manufacturer#5 almond aquamarine dodger light gainsboro 4 5882.970000000001 Manufacturer#5 almond azure blanched chiffon midnight 5 4271.3099999999995 PREHOOK: query: -- 14. testPTFJoinWithWindowingWithCount +explain select abc.p_mfgr, abc.p_name, rank() over (distribute by abc.p_mfgr sort by abc.p_name) as r, dense_rank() over (distribute by abc.p_mfgr sort by abc.p_name) as dr, @@ -641,9 +1761,8 @@ partition by p_mfgr order by p_name ) abc join part p1 on abc.p_partkey = p1.p_partkey PREHOOK: type: QUERY -PREHOOK: Input: default@part -#### A masked pattern was here #### POSTHOOK: query: -- 14. testPTFJoinWithWindowingWithCount +explain select abc.p_mfgr, abc.p_name, rank() over (distribute by abc.p_mfgr sort by abc.p_name) as r, dense_rank() over (distribute by abc.p_mfgr sort by abc.p_name) as dr, @@ -655,6 +1774,124 @@ partition by p_mfgr order by p_name ) abc join part p1 on abc.p_partkey = p1.p_partkey POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Map 5 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_mfgr (type: string), p_name (type: string) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_size (type: int), p_retailprice (type: double) + Map 5 + Map Operator Tree: + TableScan + alias: p1 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: p_partkey is not null (type: boolean) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_partkey (type: int) + sort order: + + Map-reduce partition columns: p_partkey (type: int) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: _col0 is not null (type: boolean) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int), _col7 (type: double) + Reducer 3 + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 p_partkey (type: int) + outputColumnNames: _col1, _col2, _col5, _col7 + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + value expressions: _col5 (type: int), _col7 (type: double) + Reducer 4 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) + outputColumnNames: _col1, _col2, _col5, _col7 + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col1 (type: string), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: bigint), _col7 (type: double), _wcol3 (type: double), _col5 (type: int), (_col5 - _wcol4) (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select abc.p_mfgr, abc.p_name, +rank() over (distribute by abc.p_mfgr sort by abc.p_name) as r, +dense_rank() over (distribute by abc.p_mfgr sort by abc.p_name) as dr, +count(abc.p_name) over (distribute by abc.p_mfgr sort by abc.p_name) as cd, +abc.p_retailprice, sum(abc.p_retailprice) over (distribute by abc.p_mfgr sort by abc.p_name rows between unbounded preceding and current row) as s1, +abc.p_size, abc.p_size - lag(abc.p_size,1,abc.p_size) over (distribute by abc.p_mfgr sort by abc.p_name) as deltaSz +from noop(on part +partition by p_mfgr +order by p_name +) abc join part p1 on abc.p_partkey = p1.p_partkey +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select abc.p_mfgr, abc.p_name, +rank() over (distribute by abc.p_mfgr sort by abc.p_name) as r, +dense_rank() over (distribute by abc.p_mfgr sort by abc.p_name) as dr, +count(abc.p_name) over (distribute by abc.p_mfgr sort by abc.p_name) as cd, +abc.p_retailprice, sum(abc.p_retailprice) over (distribute by abc.p_mfgr sort by abc.p_name rows between unbounded preceding and current row) as s1, +abc.p_size, abc.p_size - lag(abc.p_size,1,abc.p_size) over (distribute by abc.p_mfgr sort by abc.p_name) as deltaSz +from noop(on part +partition by p_mfgr +order by p_name +) abc join part p1 on abc.p_partkey = p1.p_partkey +POSTHOOK: type: QUERY POSTHOOK: Input: default@part #### A masked pattern was here #### Manufacturer#1 almond antique burnished rose metallic 1 1 4 1173.15 1173.15 2 0 @@ -686,19 +1923,99 @@ Manufacturer#5 almond antique sky peru orange 3 3 3 1788.73 5190.08 2 -4 Manufacturer#5 almond aquamarine dodger light gainsboro 4 4 4 1018.1 6208.18 46 44 Manufacturer#5 almond azure blanched chiffon midnight 5 5 5 1464.48 7672.66 23 -23 PREHOOK: query: -- 15. testDistinctInSelectWithPTF +explain select DISTINCT p_mfgr, p_name, p_size from noop(on part partition by p_mfgr order by p_name) PREHOOK: type: QUERY -PREHOOK: Input: default@part -#### A masked pattern was here #### POSTHOOK: query: -- 15. testDistinctInSelectWithPTF +explain select DISTINCT p_mfgr, p_name, p_size from noop(on part partition by p_mfgr order by p_name) POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_mfgr (type: string), p_name (type: string) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int) + Reducer 2 + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int) + outputColumnNames: _col2, _col1, _col5 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col2 (type: string), _col1 (type: string), _col5 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reducer 3 + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select DISTINCT p_mfgr, p_name, p_size +from noop(on part +partition by p_mfgr +order by p_name) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select DISTINCT p_mfgr, p_name, p_size +from noop(on part +partition by p_mfgr +order by p_name) +POSTHOOK: type: QUERY POSTHOOK: Input: default@part #### A masked pattern was here #### Manufacturer#1 almond antique burnished rose metallic 2 @@ -746,6 +2063,108 @@ POSTHOOK: type: CREATEVIEW POSTHOOK: Input: default@part POSTHOOK: Output: database:default POSTHOOK: Output: default@mfgr_price_view +PREHOOK: query: explain +select p_mfgr, p_brand, s, +sum(s) over w1 as s1 +from noop(on mfgr_price_view +partition by p_mfgr +order by p_mfgr) +window w1 as ( partition by p_mfgr order by p_brand rows between 2 preceding and current row) +PREHOOK: type: QUERY +POSTHOOK: query: explain +select p_mfgr, p_brand, s, +sum(s) over w1 as s1 +from noop(on mfgr_price_view +partition by p_mfgr +order by p_mfgr) +window w1 as ( partition by p_mfgr order by p_brand rows between 2 preceding and current row) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: p_mfgr (type: string), p_brand (type: string), p_retailprice (type: double) + outputColumnNames: p_mfgr, p_brand, p_retailprice + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(p_retailprice) + keys: p_mfgr (type: string), p_brand (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: double) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col0 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: double) + Reducer 3 + Reduce Operator Tree: + Extract + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: double) + Reducer 4 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), VALUE._col0 (type: double) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: double), _wcol0 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + PREHOOK: query: select p_mfgr, p_brand, s, sum(s) over w1 as s1 from noop(on mfgr_price_view @@ -828,6 +2247,169 @@ fv1 INT) POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@part_5 +PREHOOK: query: explain +from noop(on part +partition by p_mfgr +order by p_name) +INSERT OVERWRITE TABLE part_4 select p_mfgr, p_name, p_size, +rank() over (distribute by p_mfgr sort by p_name) as r, +dense_rank() over (distribute by p_mfgr sort by p_name) as dr, +sum(p_retailprice) over (distribute by p_mfgr sort by p_name rows between unbounded preceding and current row) as s +INSERT OVERWRITE TABLE part_5 select p_mfgr,p_name, p_size, +round(sum(p_size) over (distribute by p_mfgr sort by p_size range between 5 preceding and current row),1) as s2, +rank() over (distribute by p_mfgr sort by p_mfgr, p_name) as r, +dense_rank() over (distribute by p_mfgr sort by p_mfgr, p_name) as dr, +cume_dist() over (distribute by p_mfgr sort by p_mfgr, p_name) as cud, +first_value(p_size, true) over w1 as fv1 +window w1 as (distribute by p_mfgr sort by p_mfgr, p_name rows between 2 preceding and 2 following) +PREHOOK: type: QUERY +POSTHOOK: query: explain +from noop(on part +partition by p_mfgr +order by p_name) +INSERT OVERWRITE TABLE part_4 select p_mfgr, p_name, p_size, +rank() over (distribute by p_mfgr sort by p_name) as r, +dense_rank() over (distribute by p_mfgr sort by p_name) as dr, +sum(p_retailprice) over (distribute by p_mfgr sort by p_name rows between unbounded preceding and current row) as s +INSERT OVERWRITE TABLE part_5 select p_mfgr,p_name, p_size, +round(sum(p_size) over (distribute by p_mfgr sort by p_size range between 5 preceding and current row),1) as s2, +rank() over (distribute by p_mfgr sort by p_mfgr, p_name) as r, +dense_rank() over (distribute by p_mfgr sort by p_mfgr, p_name) as dr, +cume_dist() over (distribute by p_mfgr sort by p_mfgr, p_name) as cud, +first_value(p_size, true) over w1 as fv1 +window w1 as (distribute by p_mfgr sort by p_mfgr, p_name rows between 2 preceding and 2 following) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-2 is a root stage + Stage-3 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-3 + Stage-4 depends on stages: Stage-0 + Stage-1 depends on stages: Stage-3 + Stage-5 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-2 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 2 (SIMPLE_EDGE) + Reducer 5 <- Reducer 4 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_mfgr (type: string), p_name (type: string) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int), p_retailprice (type: double) + Reducer 2 + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col5 (type: int), _col7 (type: double) + Reduce Output Operator + key expressions: _col2 (type: string), _col5 (type: int) + sort order: ++ + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Reducer 3 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) + outputColumnNames: _col1, _col2, _col5, _col7 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.part_4 + Reducer 4 + Reduce Operator Tree: + Select Operator + expressions: VALUE._col1 (type: string), KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: int) + outputColumnNames: _col1, _col2, _col5 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: string), _col2 (type: string), _col1 (type: string) + sort order: +++ + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _wcol0 (type: bigint), _col5 (type: int) + Reducer 5 + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: bigint), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col4 (type: int) + outputColumnNames: _col0, _col2, _col3, _col6 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col3 (type: string), _col2 (type: string), _col6 (type: int), UDFToInteger(round(_col0, 1)) (type: int), _wcol1 (type: int), _wcol2 (type: int), _wcol3 (type: double), _wcol4 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.part_5 + + Stage: Stage-3 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.part_4 + + Stage: Stage-4 + Stats-Aggr Operator + + Stage: Stage-1 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.part_5 + + Stage: Stage-5 + Stats-Aggr Operator + PREHOOK: query: from noop(on part partition by p_mfgr order by p_name) @@ -947,6 +2529,7 @@ Manufacturer#5 almond antique sky peru orange 2 2 3 3 0.6 31 Manufacturer#5 almond aquamarine dodger light gainsboro 46 46 4 4 0.8 6 Manufacturer#5 almond azure blanched chiffon midnight 23 23 5 5 1.0 2 PREHOOK: query: -- 18. testMulti2OperatorsFunctionChainWithMap +explain select p_mfgr, p_name, rank() over (partition by p_mfgr,p_name) as r, dense_rank() over (partition by p_mfgr,p_name) as dr, @@ -963,9 +2546,8 @@ from noop(on partition by p_mfgr,p_name order by p_mfgr,p_name) PREHOOK: type: QUERY -PREHOOK: Input: default@part -#### A masked pattern was here #### POSTHOOK: query: -- 18. testMulti2OperatorsFunctionChainWithMap +explain select p_mfgr, p_name, rank() over (partition by p_mfgr,p_name) as r, dense_rank() over (partition by p_mfgr,p_name) as dr, @@ -982,6 +2564,116 @@ from noop(on partition by p_mfgr,p_name order by p_mfgr,p_name) POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_mfgr (type: string), p_mfgr (type: string) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int) + Reducer 2 + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string), _col2 (type: string), _col1 (type: string) + sort order: ++++ + Map-reduce partition columns: _col2 (type: string), _col1 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) + Reducer 3 + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string), _col2 (type: string), _col1 (type: string) + sort order: ++++ + Map-reduce partition columns: _col2 (type: string), _col1 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col5 (type: int) + Reducer 4 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) + outputColumnNames: _col1, _col2, _col5 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col1 (type: string), _wcol0 (type: int), _wcol1 (type: int), _col5 (type: int), _wcol2 (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr, p_name, +rank() over (partition by p_mfgr,p_name) as r, +dense_rank() over (partition by p_mfgr,p_name) as dr, +p_size, sum(p_size) over (partition by p_mfgr,p_name rows between unbounded preceding and current row) as s1 +from noop(on + noopwithmap(on + noop(on + noop(on part + partition by p_mfgr + order by p_mfgr) + ) + partition by p_mfgr,p_name + order by p_mfgr,p_name) + partition by p_mfgr,p_name + order by p_mfgr,p_name) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, p_name, +rank() over (partition by p_mfgr,p_name) as r, +dense_rank() over (partition by p_mfgr,p_name) as dr, +p_size, sum(p_size) over (partition by p_mfgr,p_name rows between unbounded preceding and current row) as s1 +from noop(on + noopwithmap(on + noop(on + noop(on part + partition by p_mfgr + order by p_mfgr) + ) + partition by p_mfgr,p_name + order by p_mfgr,p_name) + partition by p_mfgr,p_name + order by p_mfgr,p_name) +POSTHOOK: type: QUERY POSTHOOK: Input: default@part #### A masked pattern was here #### Manufacturer#1 almond antique burnished rose metallic 1 1 2 2 @@ -1011,6 +2703,7 @@ Manufacturer#5 almond antique sky peru orange 1 1 2 2 Manufacturer#5 almond aquamarine dodger light gainsboro 1 1 46 46 Manufacturer#5 almond azure blanched chiffon midnight 1 1 23 23 PREHOOK: query: -- 19. testMulti3OperatorsFunctionChain +explain select p_mfgr, p_name, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -1027,9 +2720,8 @@ from noop(on partition by p_mfgr order by p_mfgr ) PREHOOK: type: QUERY -PREHOOK: Input: default@part -#### A masked pattern was here #### POSTHOOK: query: -- 19. testMulti3OperatorsFunctionChain +explain select p_mfgr, p_name, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -1046,6 +2738,127 @@ from noop(on partition by p_mfgr order by p_mfgr ) POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) + Reducer 5 <- Reducer 4 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_mfgr (type: string), p_mfgr (type: string) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int) + Reducer 2 + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string), _col2 (type: string), _col1 (type: string) + sort order: ++++ + Map-reduce partition columns: _col2 (type: string), _col1 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) + Reducer 3 + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: string), _col2 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) + Reducer 4 + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col5 (type: int) + Reducer 5 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) + outputColumnNames: _col1, _col2, _col5 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col1 (type: string), _wcol0 (type: int), _wcol1 (type: int), _col5 (type: int), _wcol2 (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr, p_name, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +p_size, sum(p_size) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s1 +from noop(on + noop(on + noop(on + noop(on part + partition by p_mfgr + order by p_mfgr) + ) + partition by p_mfgr,p_name + order by p_mfgr,p_name) + partition by p_mfgr + order by p_mfgr ) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, p_name, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +p_size, sum(p_size) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s1 +from noop(on + noop(on + noop(on + noop(on part + partition by p_mfgr + order by p_mfgr) + ) + partition by p_mfgr,p_name + order by p_mfgr,p_name) + partition by p_mfgr + order by p_mfgr ) +POSTHOOK: type: QUERY POSTHOOK: Input: default@part #### A masked pattern was here #### Manufacturer#1 almond antique burnished rose metallic 1 1 2 2 @@ -1075,6 +2888,7 @@ Manufacturer#5 almond antique sky peru orange 3 3 2 39 Manufacturer#5 almond aquamarine dodger light gainsboro 4 4 46 85 Manufacturer#5 almond azure blanched chiffon midnight 5 5 23 108 PREHOOK: query: -- 20. testMultiOperatorChainWithNoWindowing +explain select p_mfgr, p_name, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -1089,9 +2903,8 @@ from noop(on partition by p_mfgr order by p_mfgr)) PREHOOK: type: QUERY -PREHOOK: Input: default@part -#### A masked pattern was here #### POSTHOOK: query: -- 20. testMultiOperatorChainWithNoWindowing +explain select p_mfgr, p_name, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -1106,6 +2919,110 @@ from noop(on partition by p_mfgr order by p_mfgr)) POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_mfgr (type: string), p_name (type: string), p_mfgr (type: string), p_name (type: string) + sort order: ++++ + Map-reduce partition columns: p_mfgr (type: string), p_name (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int) + Reducer 2 + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: string), _col2 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) + Reducer 3 + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col5 (type: int) + Reducer 4 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) + outputColumnNames: _col1, _col2, _col5 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col1 (type: string), _wcol0 (type: int), _wcol1 (type: int), _col5 (type: int), _wcol2 (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr, p_name, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +p_size, sum(p_size) over (partition by p_mfgr order by p_name) as s1 +from noop(on + noop(on + noop(on + noop(on part + partition by p_mfgr,p_name + order by p_mfgr,p_name) + ) + partition by p_mfgr + order by p_mfgr)) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, p_name, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +p_size, sum(p_size) over (partition by p_mfgr order by p_name) as s1 +from noop(on + noop(on + noop(on + noop(on part + partition by p_mfgr,p_name + order by p_mfgr,p_name) + ) + partition by p_mfgr + order by p_mfgr)) +POSTHOOK: type: QUERY POSTHOOK: Input: default@part #### A masked pattern was here #### Manufacturer#1 almond antique burnished rose metallic 1 1 2 4 @@ -1135,6 +3052,7 @@ Manufacturer#5 almond antique sky peru orange 3 3 2 39 Manufacturer#5 almond aquamarine dodger light gainsboro 4 4 46 85 Manufacturer#5 almond azure blanched chiffon midnight 5 5 23 108 PREHOOK: query: -- 21. testMultiOperatorChainEndsWithNoopMap +explain select p_mfgr, p_name, rank() over (partition by p_mfgr,p_name) as r, dense_rank() over (partition by p_mfgr,p_name) as dr, @@ -1151,9 +3069,8 @@ from noopwithmap(on partition by p_mfgr,p_name order by p_mfgr,p_name) PREHOOK: type: QUERY -PREHOOK: Input: default@part -#### A masked pattern was here #### POSTHOOK: query: -- 21. testMultiOperatorChainEndsWithNoopMap +explain select p_mfgr, p_name, rank() over (partition by p_mfgr,p_name) as r, dense_rank() over (partition by p_mfgr,p_name) as dr, @@ -1170,6 +3087,129 @@ from noopwithmap(on partition by p_mfgr,p_name order by p_mfgr,p_name) POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) + Reducer 5 <- Reducer 4 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_mfgr (type: string), p_name (type: string), p_mfgr (type: string), p_name (type: string) + sort order: ++++ + Map-reduce partition columns: p_mfgr (type: string), p_name (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int) + Reducer 2 + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: string), _col2 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) + Reducer 3 + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string), _col2 (type: string), _col1 (type: string) + sort order: ++++ + Map-reduce partition columns: _col2 (type: string), _col1 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) + Reducer 4 + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string), _col2 (type: string), _col1 (type: string) + sort order: ++++ + Map-reduce partition columns: _col2 (type: string), _col1 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col5 (type: int) + Reducer 5 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) + outputColumnNames: _col1, _col2, _col5 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col1 (type: string), _wcol0 (type: int), _wcol1 (type: int), _col5 (type: int), _wcol2 (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr, p_name, +rank() over (partition by p_mfgr,p_name) as r, +dense_rank() over (partition by p_mfgr,p_name) as dr, +p_size, sum(p_size) over (partition by p_mfgr,p_name rows between unbounded preceding and current row) as s1 +from noopwithmap(on + noop(on + noop(on + noop(on part + partition by p_mfgr,p_name + order by p_mfgr,p_name) + ) + partition by p_mfgr + order by p_mfgr) + partition by p_mfgr,p_name + order by p_mfgr,p_name) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, p_name, +rank() over (partition by p_mfgr,p_name) as r, +dense_rank() over (partition by p_mfgr,p_name) as dr, +p_size, sum(p_size) over (partition by p_mfgr,p_name rows between unbounded preceding and current row) as s1 +from noopwithmap(on + noop(on + noop(on + noop(on part + partition by p_mfgr,p_name + order by p_mfgr,p_name) + ) + partition by p_mfgr + order by p_mfgr) + partition by p_mfgr,p_name + order by p_mfgr,p_name) +POSTHOOK: type: QUERY POSTHOOK: Input: default@part #### A masked pattern was here #### Manufacturer#1 almond antique burnished rose metallic 1 1 2 2 @@ -1199,6 +3239,7 @@ Manufacturer#5 almond antique sky peru orange 1 1 2 2 Manufacturer#5 almond aquamarine dodger light gainsboro 1 1 46 46 Manufacturer#5 almond azure blanched chiffon midnight 1 1 23 23 PREHOOK: query: -- 22. testMultiOperatorChainWithDiffPartitionForWindow1 +explain select p_mfgr, p_name, rank() over (partition by p_mfgr,p_name order by p_mfgr,p_name) as r, dense_rank() over (partition by p_mfgr,p_name order by p_mfgr,p_name) as dr, @@ -1214,9 +3255,8 @@ from noop(on order by p_mfgr )) PREHOOK: type: QUERY -PREHOOK: Input: default@part -#### A masked pattern was here #### POSTHOOK: query: -- 22. testMultiOperatorChainWithDiffPartitionForWindow1 +explain select p_mfgr, p_name, rank() over (partition by p_mfgr,p_name order by p_mfgr,p_name) as r, dense_rank() over (partition by p_mfgr,p_name order by p_mfgr,p_name) as dr, @@ -1232,6 +3272,114 @@ from noop(on order by p_mfgr )) POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_mfgr (type: string), p_name (type: string), p_mfgr (type: string), p_name (type: string) + sort order: ++++ + Map-reduce partition columns: p_mfgr (type: string), p_name (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int) + Reducer 2 + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: string), _col2 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) + Reducer 3 + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string), _col2 (type: string), _col1 (type: string) + sort order: ++++ + Map-reduce partition columns: _col2 (type: string), _col1 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col5 (type: int) + Reducer 4 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) + outputColumnNames: _col1, _col2, _col5 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col1 (type: string), _wcol0 (type: int), _wcol1 (type: int), _col5 (type: int), _wcol2 (type: bigint), _wcol2 (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr, p_name, +rank() over (partition by p_mfgr,p_name order by p_mfgr,p_name) as r, +dense_rank() over (partition by p_mfgr,p_name order by p_mfgr,p_name) as dr, +p_size, +sum(p_size) over (partition by p_mfgr,p_name order by p_mfgr,p_name rows between unbounded preceding and current row) as s1, +sum(p_size) over (partition by p_mfgr,p_name order by p_mfgr,p_name rows between unbounded preceding and current row) as s2 +from noop(on + noopwithmap(on + noop(on part + partition by p_mfgr, p_name + order by p_mfgr, p_name) + partition by p_mfgr + order by p_mfgr + )) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, p_name, +rank() over (partition by p_mfgr,p_name order by p_mfgr,p_name) as r, +dense_rank() over (partition by p_mfgr,p_name order by p_mfgr,p_name) as dr, +p_size, +sum(p_size) over (partition by p_mfgr,p_name order by p_mfgr,p_name rows between unbounded preceding and current row) as s1, +sum(p_size) over (partition by p_mfgr,p_name order by p_mfgr,p_name rows between unbounded preceding and current row) as s2 +from noop(on + noopwithmap(on + noop(on part + partition by p_mfgr, p_name + order by p_mfgr, p_name) + partition by p_mfgr + order by p_mfgr + )) +POSTHOOK: type: QUERY POSTHOOK: Input: default@part #### A masked pattern was here #### Manufacturer#1 almond antique burnished rose metallic 1 1 2 2 2 @@ -1261,6 +3409,7 @@ Manufacturer#5 almond antique sky peru orange 1 1 2 2 2 Manufacturer#5 almond aquamarine dodger light gainsboro 1 1 46 46 46 Manufacturer#5 almond azure blanched chiffon midnight 1 1 23 23 23 PREHOOK: query: -- 23. testMultiOperatorChainWithDiffPartitionForWindow2 +explain select p_mfgr, p_name, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -1274,9 +3423,8 @@ from noopwithmap(on order by p_mfgr, p_name) )) PREHOOK: type: QUERY -PREHOOK: Input: default@part -#### A masked pattern was here #### POSTHOOK: query: -- 23. testMultiOperatorChainWithDiffPartitionForWindow2 +explain select p_mfgr, p_name, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -1290,6 +3438,110 @@ from noopwithmap(on order by p_mfgr, p_name) )) POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_mfgr (type: string), p_name (type: string), p_mfgr (type: string), p_name (type: string) + sort order: ++++ + Map-reduce partition columns: p_mfgr (type: string), p_name (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int) + Reducer 2 + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string), _col2 (type: string), _col1 (type: string) + sort order: ++++ + Map-reduce partition columns: _col2 (type: string), _col1 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) + Reducer 3 + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col5 (type: int) + Reducer 4 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) + outputColumnNames: _col1, _col2, _col5 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col1 (type: string), _wcol0 (type: int), _wcol1 (type: int), _col5 (type: int), _wcol2 (type: bigint), _wcol2 (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr, p_name, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +p_size, +sum(p_size) over (partition by p_mfgr order by p_name range between unbounded preceding and current row) as s1, +sum(p_size) over (partition by p_mfgr order by p_name range between unbounded preceding and current row) as s2 +from noopwithmap(on + noop(on + noop(on part + partition by p_mfgr, p_name + order by p_mfgr, p_name) + )) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, p_name, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +p_size, +sum(p_size) over (partition by p_mfgr order by p_name range between unbounded preceding and current row) as s1, +sum(p_size) over (partition by p_mfgr order by p_name range between unbounded preceding and current row) as s2 +from noopwithmap(on + noop(on + noop(on part + partition by p_mfgr, p_name + order by p_mfgr, p_name) + )) +POSTHOOK: type: QUERY POSTHOOK: Input: default@part #### A masked pattern was here #### Manufacturer#1 almond antique burnished rose metallic 1 1 2 4 4 diff --git ql/src/test/results/clientpositive/tez/ptf_streaming.q.out ql/src/test/results/clientpositive/tez/ptf_streaming.q.out new file mode 100644 index 0000000..a935ef6 --- /dev/null +++ ql/src/test/results/clientpositive/tez/ptf_streaming.q.out @@ -0,0 +1,1810 @@ +PREHOOK: query: -- SORT_QUERY_RESULTS + +--1. test1 +explain +select p_mfgr, p_name, p_size, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +sum(p_retailprice) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s1 +from noopstreaming(on part + partition by p_mfgr + order by p_name + ) +PREHOOK: type: QUERY +POSTHOOK: query: -- SORT_QUERY_RESULTS + +--1. test1 +explain +select p_mfgr, p_name, p_size, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +sum(p_retailprice) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s1 +from noopstreaming(on part + partition by p_mfgr + order by p_name + ) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_mfgr (type: string), p_name (type: string) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int), p_retailprice (type: double) + Reducer 2 + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col5 (type: int), _col7 (type: double) + Reducer 3 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) + outputColumnNames: _col1, _col2, _col5, _col7 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr, p_name, p_size, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +sum(p_retailprice) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s1 +from noopstreaming(on part + partition by p_mfgr + order by p_name + ) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, p_name, p_size, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +sum(p_retailprice) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s1 +from noopstreaming(on part + partition by p_mfgr + order by p_name + ) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +Manufacturer#1 almond antique burnished rose metallic 2 1 1 1173.15 +Manufacturer#1 almond antique burnished rose metallic 2 1 1 2346.3 +Manufacturer#1 almond antique chartreuse lavender yellow 34 3 2 4100.06 +Manufacturer#1 almond antique salmon chartreuse burlywood 6 4 3 5702.650000000001 +Manufacturer#1 almond aquamarine burnished black steel 28 5 4 7117.070000000001 +Manufacturer#1 almond aquamarine pink moccasin thistle 42 6 5 8749.730000000001 +Manufacturer#2 almond antique violet chocolate turquoise 14 1 1 1690.68 +Manufacturer#2 almond antique violet turquoise frosted 40 2 2 3491.38 +Manufacturer#2 almond aquamarine midnight light salmon 2 3 3 5523.360000000001 +Manufacturer#2 almond aquamarine rose maroon antique 25 4 4 7222.02 +Manufacturer#2 almond aquamarine sandy cyan gainsboro 18 5 5 8923.62 +Manufacturer#3 almond antique chartreuse khaki white 17 1 1 1671.68 +Manufacturer#3 almond antique forest lavender goldenrod 14 2 2 2861.95 +Manufacturer#3 almond antique metallic orange dim 19 3 3 4272.34 +Manufacturer#3 almond antique misty red olive 1 4 4 6195.32 +Manufacturer#3 almond antique olive coral navajo 45 5 5 7532.61 +Manufacturer#4 almond antique gainsboro frosted violet 10 1 1 1620.67 +Manufacturer#4 almond antique violet mint lemon 39 2 2 2996.09 +Manufacturer#4 almond aquamarine floral ivory bisque 27 3 3 4202.35 +Manufacturer#4 almond aquamarine yellow dodger mint 7 4 4 6047.27 +Manufacturer#4 almond azure aquamarine papaya violet 12 5 5 7337.620000000001 +Manufacturer#5 almond antique blue firebrick mint 31 1 1 1789.69 +Manufacturer#5 almond antique medium spring khaki 6 2 2 3401.3500000000004 +Manufacturer#5 almond antique sky peru orange 2 3 3 5190.08 +Manufacturer#5 almond aquamarine dodger light gainsboro 46 4 4 6208.18 +Manufacturer#5 almond azure blanched chiffon midnight 23 5 5 7672.66 +PREHOOK: query: -- 2. testJoinWithNoop +explain +select p_mfgr, p_name, +p_size, p_size - lag(p_size,1,p_size) over (partition by p_mfgr order by p_name) as deltaSz +from noopstreaming (on (select p1.* from part p1 join part p2 on p1.p_partkey = p2.p_partkey) j +distribute by j.p_mfgr +sort by j.p_name) +PREHOOK: type: QUERY +POSTHOOK: query: -- 2. testJoinWithNoop +explain +select p_mfgr, p_name, +p_size, p_size - lag(p_size,1,p_size) over (partition by p_mfgr order by p_name) as deltaSz +from noopstreaming (on (select p1.* from part p1 join part p2 on p1.p_partkey = p2.p_partkey) j +distribute by j.p_mfgr +sort by j.p_name) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: p1 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: p_partkey is not null (type: boolean) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_partkey (type: int) + sort order: + + Map-reduce partition columns: p_partkey (type: int) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int) + Map 5 + Map Operator Tree: + TableScan + alias: p2 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: p_partkey is not null (type: boolean) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_partkey (type: int) + sort order: + + Map-reduce partition columns: p_partkey (type: int) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 p_partkey (type: int) + 1 p_partkey (type: int) + outputColumnNames: _col1, _col2, _col5 + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) + Reducer 3 + Reduce Operator Tree: + Extract + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + value expressions: _col5 (type: int) + Reducer 4 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) + outputColumnNames: _col1, _col2, _col5 + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), (_col5 - _wcol0) (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr, p_name, +p_size, p_size - lag(p_size,1,p_size) over (partition by p_mfgr order by p_name) as deltaSz +from noopstreaming (on (select p1.* from part p1 join part p2 on p1.p_partkey = p2.p_partkey) j +distribute by j.p_mfgr +sort by j.p_name) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, p_name, +p_size, p_size - lag(p_size,1,p_size) over (partition by p_mfgr order by p_name) as deltaSz +from noopstreaming (on (select p1.* from part p1 join part p2 on p1.p_partkey = p2.p_partkey) j +distribute by j.p_mfgr +sort by j.p_name) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +Manufacturer#1 almond antique burnished rose metallic 2 0 +Manufacturer#1 almond antique burnished rose metallic 2 0 +Manufacturer#1 almond antique burnished rose metallic 2 0 +Manufacturer#1 almond antique burnished rose metallic 2 0 +Manufacturer#1 almond antique chartreuse lavender yellow 34 32 +Manufacturer#1 almond antique salmon chartreuse burlywood 6 -28 +Manufacturer#1 almond aquamarine burnished black steel 28 22 +Manufacturer#1 almond aquamarine pink moccasin thistle 42 14 +Manufacturer#2 almond antique violet chocolate turquoise 14 0 +Manufacturer#2 almond antique violet turquoise frosted 40 26 +Manufacturer#2 almond aquamarine midnight light salmon 2 -38 +Manufacturer#2 almond aquamarine rose maroon antique 25 23 +Manufacturer#2 almond aquamarine sandy cyan gainsboro 18 -7 +Manufacturer#3 almond antique chartreuse khaki white 17 0 +Manufacturer#3 almond antique forest lavender goldenrod 14 -3 +Manufacturer#3 almond antique metallic orange dim 19 5 +Manufacturer#3 almond antique misty red olive 1 -18 +Manufacturer#3 almond antique olive coral navajo 45 44 +Manufacturer#4 almond antique gainsboro frosted violet 10 0 +Manufacturer#4 almond antique violet mint lemon 39 29 +Manufacturer#4 almond aquamarine floral ivory bisque 27 -12 +Manufacturer#4 almond aquamarine yellow dodger mint 7 -20 +Manufacturer#4 almond azure aquamarine papaya violet 12 5 +Manufacturer#5 almond antique blue firebrick mint 31 0 +Manufacturer#5 almond antique medium spring khaki 6 -25 +Manufacturer#5 almond antique sky peru orange 2 -4 +Manufacturer#5 almond aquamarine dodger light gainsboro 46 44 +Manufacturer#5 almond azure blanched chiffon midnight 23 -23 +PREHOOK: query: -- 7. testJoin +explain +select abc.* +from noopstreaming(on part +partition by p_mfgr +order by p_name +) abc join part p1 on abc.p_partkey = p1.p_partkey +PREHOOK: type: QUERY +POSTHOOK: query: -- 7. testJoin +explain +select abc.* +from noopstreaming(on part +partition by p_mfgr +order by p_name +) abc join part p1 on abc.p_partkey = p1.p_partkey +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Map 4 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_mfgr (type: string), p_name (type: string) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + Map 4 + Map Operator Tree: + TableScan + alias: p1 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: p_partkey is not null (type: boolean) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_partkey (type: int) + sort order: + + Map-reduce partition columns: p_partkey (type: int) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: _col0 is not null (type: boolean) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + Reducer 3 + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 p_partkey (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select abc.* +from noopstreaming(on part +partition by p_mfgr +order by p_name +) abc join part p1 on abc.p_partkey = p1.p_partkey +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select abc.* +from noopstreaming(on part +partition by p_mfgr +order by p_name +) abc join part p1 on abc.p_partkey = p1.p_partkey +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +105685 almond antique violet chocolate turquoise Manufacturer#2 Brand#22 MEDIUM ANODIZED COPPER 14 MED CAN 1690.68 ly pending requ +110592 almond antique salmon chartreuse burlywood Manufacturer#1 Brand#15 PROMO BURNISHED NICKEL 6 JUMBO PKG 1602.59 to the furiously +112398 almond antique metallic orange dim Manufacturer#3 Brand#32 MEDIUM BURNISHED BRASS 19 JUMBO JAR 1410.39 ole car +121152 almond antique burnished rose metallic Manufacturer#1 Brand#14 PROMO PLATED TIN 2 JUMBO BOX 1173.15 e pinto beans h +121152 almond antique burnished rose metallic Manufacturer#1 Brand#14 PROMO PLATED TIN 2 JUMBO BOX 1173.15 e pinto beans h +121152 almond antique burnished rose metallic Manufacturer#1 Brand#14 PROMO PLATED TIN 2 JUMBO BOX 1173.15 e pinto beans h +121152 almond antique burnished rose metallic Manufacturer#1 Brand#14 PROMO PLATED TIN 2 JUMBO BOX 1173.15 e pinto beans h +132666 almond aquamarine rose maroon antique Manufacturer#2 Brand#24 SMALL POLISHED NICKEL 25 MED BOX 1698.66 even +144293 almond antique olive coral navajo Manufacturer#3 Brand#34 STANDARD POLISHED STEEL 45 JUMBO CAN 1337.29 ag furiously about +146985 almond aquamarine midnight light salmon Manufacturer#2 Brand#23 MEDIUM BURNISHED COPPER 2 SM CASE 2031.98 s cajole caref +15103 almond aquamarine dodger light gainsboro Manufacturer#5 Brand#53 ECONOMY BURNISHED STEEL 46 LG PACK 1018.1 packages hinder carefu +155733 almond antique sky peru orange Manufacturer#5 Brand#53 SMALL PLATED BRASS 2 WRAP DRUM 1788.73 furiously. bra +17273 almond antique forest lavender goldenrod Manufacturer#3 Brand#35 PROMO ANODIZED TIN 14 JUMBO CASE 1190.27 along the +17927 almond aquamarine yellow dodger mint Manufacturer#4 Brand#41 ECONOMY BRUSHED COPPER 7 SM PKG 1844.92 ites. eve +191709 almond antique violet turquoise frosted Manufacturer#2 Brand#22 ECONOMY POLISHED STEEL 40 MED BOX 1800.7 haggle +192697 almond antique blue firebrick mint Manufacturer#5 Brand#52 MEDIUM BURNISHED TIN 31 LG DRUM 1789.69 ickly ir +195606 almond aquamarine sandy cyan gainsboro Manufacturer#2 Brand#25 STANDARD PLATED TIN 18 SM PKG 1701.6 ic de +33357 almond azure aquamarine papaya violet Manufacturer#4 Brand#41 STANDARD ANODIZED TIN 12 WRAP CASE 1290.35 reful +40982 almond antique misty red olive Manufacturer#3 Brand#32 ECONOMY PLATED COPPER 1 LG PKG 1922.98 c foxes can s +42669 almond antique medium spring khaki Manufacturer#5 Brand#51 STANDARD BURNISHED TIN 6 MED CAN 1611.66 sits haggl +45261 almond aquamarine floral ivory bisque Manufacturer#4 Brand#42 SMALL PLATED STEEL 27 WRAP CASE 1206.26 careful +48427 almond antique violet mint lemon Manufacturer#4 Brand#42 PROMO POLISHED STEEL 39 SM CASE 1375.42 hely ironic i +49671 almond antique gainsboro frosted violet Manufacturer#4 Brand#41 SMALL BRUSHED BRASS 10 SM BOX 1620.67 ccounts run quick +65667 almond aquamarine pink moccasin thistle Manufacturer#1 Brand#12 LARGE BURNISHED STEEL 42 JUMBO CASE 1632.66 e across the expr +78486 almond azure blanched chiffon midnight Manufacturer#5 Brand#52 LARGE BRUSHED BRASS 23 MED BAG 1464.48 hely blith +85768 almond antique chartreuse lavender yellow Manufacturer#1 Brand#12 LARGE BRUSHED STEEL 34 SM BAG 1753.76 refull +86428 almond aquamarine burnished black steel Manufacturer#1 Brand#12 STANDARD ANODIZED STEEL 28 WRAP BAG 1414.42 arefully +90681 almond antique chartreuse khaki white Manufacturer#3 Brand#31 MEDIUM BURNISHED TIN 17 SM CASE 1671.68 are slyly after the sl +PREHOOK: query: -- 9. testNoopWithMap +explain +select p_mfgr, p_name, p_size, +rank() over (partition by p_mfgr order by p_name, p_size desc) as r +from noopwithmapstreaming(on part +partition by p_mfgr +order by p_name, p_size desc) +PREHOOK: type: QUERY +POSTHOOK: query: -- 9. testNoopWithMap +explain +select p_mfgr, p_name, p_size, +rank() over (partition by p_mfgr order by p_name, p_size desc) as r +from noopwithmapstreaming(on part +partition by p_mfgr +order by p_name, p_size desc) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_mfgr (type: string), p_name (type: string), p_size (type: int) + sort order: ++- + Map-reduce partition columns: p_mfgr (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int) + Reducer 2 + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int) + sort order: ++- + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reducer 3 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), KEY.reducesinkkey2 (type: int) + outputColumnNames: _col1, _col2, _col5 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr, p_name, p_size, +rank() over (partition by p_mfgr order by p_name, p_size desc) as r +from noopwithmapstreaming(on part +partition by p_mfgr +order by p_name, p_size desc) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, p_name, p_size, +rank() over (partition by p_mfgr order by p_name, p_size desc) as r +from noopwithmapstreaming(on part +partition by p_mfgr +order by p_name, p_size desc) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +Manufacturer#1 almond antique burnished rose metallic 2 1 +Manufacturer#1 almond antique burnished rose metallic 2 1 +Manufacturer#1 almond antique chartreuse lavender yellow 34 3 +Manufacturer#1 almond antique salmon chartreuse burlywood 6 4 +Manufacturer#1 almond aquamarine burnished black steel 28 5 +Manufacturer#1 almond aquamarine pink moccasin thistle 42 6 +Manufacturer#2 almond antique violet chocolate turquoise 14 1 +Manufacturer#2 almond antique violet turquoise frosted 40 2 +Manufacturer#2 almond aquamarine midnight light salmon 2 3 +Manufacturer#2 almond aquamarine rose maroon antique 25 4 +Manufacturer#2 almond aquamarine sandy cyan gainsboro 18 5 +Manufacturer#3 almond antique chartreuse khaki white 17 1 +Manufacturer#3 almond antique forest lavender goldenrod 14 2 +Manufacturer#3 almond antique metallic orange dim 19 3 +Manufacturer#3 almond antique misty red olive 1 4 +Manufacturer#3 almond antique olive coral navajo 45 5 +Manufacturer#4 almond antique gainsboro frosted violet 10 1 +Manufacturer#4 almond antique violet mint lemon 39 2 +Manufacturer#4 almond aquamarine floral ivory bisque 27 3 +Manufacturer#4 almond aquamarine yellow dodger mint 7 4 +Manufacturer#4 almond azure aquamarine papaya violet 12 5 +Manufacturer#5 almond antique blue firebrick mint 31 1 +Manufacturer#5 almond antique medium spring khaki 6 2 +Manufacturer#5 almond antique sky peru orange 2 3 +Manufacturer#5 almond aquamarine dodger light gainsboro 46 4 +Manufacturer#5 almond azure blanched chiffon midnight 23 5 +PREHOOK: query: -- 10. testNoopWithMapWithWindowing +explain +select p_mfgr, p_name, p_size, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +sum(p_retailprice) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s1 +from noopwithmapstreaming(on part + partition by p_mfgr + order by p_name) +PREHOOK: type: QUERY +POSTHOOK: query: -- 10. testNoopWithMapWithWindowing +explain +select p_mfgr, p_name, p_size, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +sum(p_retailprice) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s1 +from noopwithmapstreaming(on part + partition by p_mfgr + order by p_name) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_mfgr (type: string), p_name (type: string) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int), p_retailprice (type: double) + Reducer 2 + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col5 (type: int), _col7 (type: double) + Reducer 3 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) + outputColumnNames: _col1, _col2, _col5, _col7 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr, p_name, p_size, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +sum(p_retailprice) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s1 +from noopwithmapstreaming(on part + partition by p_mfgr + order by p_name) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, p_name, p_size, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +sum(p_retailprice) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s1 +from noopwithmapstreaming(on part + partition by p_mfgr + order by p_name) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +Manufacturer#1 almond antique burnished rose metallic 2 1 1 1173.15 +Manufacturer#1 almond antique burnished rose metallic 2 1 1 2346.3 +Manufacturer#1 almond antique chartreuse lavender yellow 34 3 2 4100.06 +Manufacturer#1 almond antique salmon chartreuse burlywood 6 4 3 5702.650000000001 +Manufacturer#1 almond aquamarine burnished black steel 28 5 4 7117.070000000001 +Manufacturer#1 almond aquamarine pink moccasin thistle 42 6 5 8749.730000000001 +Manufacturer#2 almond antique violet chocolate turquoise 14 1 1 1690.68 +Manufacturer#2 almond antique violet turquoise frosted 40 2 2 3491.38 +Manufacturer#2 almond aquamarine midnight light salmon 2 3 3 5523.360000000001 +Manufacturer#2 almond aquamarine rose maroon antique 25 4 4 7222.02 +Manufacturer#2 almond aquamarine sandy cyan gainsboro 18 5 5 8923.62 +Manufacturer#3 almond antique chartreuse khaki white 17 1 1 1671.68 +Manufacturer#3 almond antique forest lavender goldenrod 14 2 2 2861.95 +Manufacturer#3 almond antique metallic orange dim 19 3 3 4272.34 +Manufacturer#3 almond antique misty red olive 1 4 4 6195.32 +Manufacturer#3 almond antique olive coral navajo 45 5 5 7532.61 +Manufacturer#4 almond antique gainsboro frosted violet 10 1 1 1620.67 +Manufacturer#4 almond antique violet mint lemon 39 2 2 2996.09 +Manufacturer#4 almond aquamarine floral ivory bisque 27 3 3 4202.35 +Manufacturer#4 almond aquamarine yellow dodger mint 7 4 4 6047.27 +Manufacturer#4 almond azure aquamarine papaya violet 12 5 5 7337.620000000001 +Manufacturer#5 almond antique blue firebrick mint 31 1 1 1789.69 +Manufacturer#5 almond antique medium spring khaki 6 2 2 3401.3500000000004 +Manufacturer#5 almond antique sky peru orange 2 3 3 5190.08 +Manufacturer#5 almond aquamarine dodger light gainsboro 46 4 4 6208.18 +Manufacturer#5 almond azure blanched chiffon midnight 23 5 5 7672.66 +PREHOOK: query: -- 12. testFunctionChain +explain +select p_mfgr, p_name, p_size, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +sum(p_retailprice) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s1 +from noopstreaming(on noopwithmapstreaming(on noopstreaming(on part +partition by p_mfgr +order by p_mfgr, p_name +))) +PREHOOK: type: QUERY +POSTHOOK: query: -- 12. testFunctionChain +explain +select p_mfgr, p_name, p_size, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +sum(p_retailprice) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s1 +from noopstreaming(on noopwithmapstreaming(on noopstreaming(on part +partition by p_mfgr +order by p_mfgr, p_name +))) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_mfgr (type: string), p_mfgr (type: string), p_name (type: string) + sort order: +++ + Map-reduce partition columns: p_mfgr (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int), p_retailprice (type: double) + Reducer 2 + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: string), _col2 (type: string), _col1 (type: string) + sort order: +++ + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int), _col7 (type: double) + Reducer 3 + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col5 (type: int), _col7 (type: double) + Reducer 4 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) + outputColumnNames: _col1, _col2, _col5, _col7 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr, p_name, p_size, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +sum(p_retailprice) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s1 +from noopstreaming(on noopwithmapstreaming(on noopstreaming(on part +partition by p_mfgr +order by p_mfgr, p_name +))) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, p_name, p_size, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +sum(p_retailprice) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s1 +from noopstreaming(on noopwithmapstreaming(on noopstreaming(on part +partition by p_mfgr +order by p_mfgr, p_name +))) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +Manufacturer#1 almond antique burnished rose metallic 2 1 1 1173.15 +Manufacturer#1 almond antique burnished rose metallic 2 1 1 2346.3 +Manufacturer#1 almond antique chartreuse lavender yellow 34 3 2 4100.06 +Manufacturer#1 almond antique salmon chartreuse burlywood 6 4 3 5702.650000000001 +Manufacturer#1 almond aquamarine burnished black steel 28 5 4 7117.070000000001 +Manufacturer#1 almond aquamarine pink moccasin thistle 42 6 5 8749.730000000001 +Manufacturer#2 almond antique violet chocolate turquoise 14 1 1 1690.68 +Manufacturer#2 almond antique violet turquoise frosted 40 2 2 3491.38 +Manufacturer#2 almond aquamarine midnight light salmon 2 3 3 5523.360000000001 +Manufacturer#2 almond aquamarine rose maroon antique 25 4 4 7222.02 +Manufacturer#2 almond aquamarine sandy cyan gainsboro 18 5 5 8923.62 +Manufacturer#3 almond antique chartreuse khaki white 17 1 1 1671.68 +Manufacturer#3 almond antique forest lavender goldenrod 14 2 2 2861.95 +Manufacturer#3 almond antique metallic orange dim 19 3 3 4272.34 +Manufacturer#3 almond antique misty red olive 1 4 4 6195.32 +Manufacturer#3 almond antique olive coral navajo 45 5 5 7532.61 +Manufacturer#4 almond antique gainsboro frosted violet 10 1 1 1620.67 +Manufacturer#4 almond antique violet mint lemon 39 2 2 2996.09 +Manufacturer#4 almond aquamarine floral ivory bisque 27 3 3 4202.35 +Manufacturer#4 almond aquamarine yellow dodger mint 7 4 4 6047.27 +Manufacturer#4 almond azure aquamarine papaya violet 12 5 5 7337.620000000001 +Manufacturer#5 almond antique blue firebrick mint 31 1 1 1789.69 +Manufacturer#5 almond antique medium spring khaki 6 2 2 3401.3500000000004 +Manufacturer#5 almond antique sky peru orange 2 3 3 5190.08 +Manufacturer#5 almond aquamarine dodger light gainsboro 46 4 4 6208.18 +Manufacturer#5 almond azure blanched chiffon midnight 23 5 5 7672.66 +PREHOOK: query: -- 12.1 testFunctionChain +explain +select p_mfgr, p_name, p_size, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +sum(p_retailprice) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s1 +from noopstreaming(on noopwithmap(on noopstreaming(on part +partition by p_mfgr +order by p_mfgr, p_name +))) +PREHOOK: type: QUERY +POSTHOOK: query: -- 12.1 testFunctionChain +explain +select p_mfgr, p_name, p_size, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +sum(p_retailprice) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s1 +from noopstreaming(on noopwithmap(on noopstreaming(on part +partition by p_mfgr +order by p_mfgr, p_name +))) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_mfgr (type: string), p_mfgr (type: string), p_name (type: string) + sort order: +++ + Map-reduce partition columns: p_mfgr (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int), p_retailprice (type: double) + Reducer 2 + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: string), _col2 (type: string), _col1 (type: string) + sort order: +++ + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int), _col7 (type: double) + Reducer 3 + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col5 (type: int), _col7 (type: double) + Reducer 4 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) + outputColumnNames: _col1, _col2, _col5, _col7 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr, p_name, p_size, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +sum(p_retailprice) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s1 +from noopstreaming(on noopwithmap(on noopstreaming(on part +partition by p_mfgr +order by p_mfgr, p_name +))) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, p_name, p_size, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +sum(p_retailprice) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s1 +from noopstreaming(on noopwithmap(on noopstreaming(on part +partition by p_mfgr +order by p_mfgr, p_name +))) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +Manufacturer#1 almond antique burnished rose metallic 2 1 1 1173.15 +Manufacturer#1 almond antique burnished rose metallic 2 1 1 2346.3 +Manufacturer#1 almond antique chartreuse lavender yellow 34 3 2 4100.06 +Manufacturer#1 almond antique salmon chartreuse burlywood 6 4 3 5702.650000000001 +Manufacturer#1 almond aquamarine burnished black steel 28 5 4 7117.070000000001 +Manufacturer#1 almond aquamarine pink moccasin thistle 42 6 5 8749.730000000001 +Manufacturer#2 almond antique violet chocolate turquoise 14 1 1 1690.68 +Manufacturer#2 almond antique violet turquoise frosted 40 2 2 3491.38 +Manufacturer#2 almond aquamarine midnight light salmon 2 3 3 5523.360000000001 +Manufacturer#2 almond aquamarine rose maroon antique 25 4 4 7222.02 +Manufacturer#2 almond aquamarine sandy cyan gainsboro 18 5 5 8923.62 +Manufacturer#3 almond antique chartreuse khaki white 17 1 1 1671.68 +Manufacturer#3 almond antique forest lavender goldenrod 14 2 2 2861.95 +Manufacturer#3 almond antique metallic orange dim 19 3 3 4272.34 +Manufacturer#3 almond antique misty red olive 1 4 4 6195.32 +Manufacturer#3 almond antique olive coral navajo 45 5 5 7532.61 +Manufacturer#4 almond antique gainsboro frosted violet 10 1 1 1620.67 +Manufacturer#4 almond antique violet mint lemon 39 2 2 2996.09 +Manufacturer#4 almond aquamarine floral ivory bisque 27 3 3 4202.35 +Manufacturer#4 almond aquamarine yellow dodger mint 7 4 4 6047.27 +Manufacturer#4 almond azure aquamarine papaya violet 12 5 5 7337.620000000001 +Manufacturer#5 almond antique blue firebrick mint 31 1 1 1789.69 +Manufacturer#5 almond antique medium spring khaki 6 2 2 3401.3500000000004 +Manufacturer#5 almond antique sky peru orange 2 3 3 5190.08 +Manufacturer#5 almond aquamarine dodger light gainsboro 46 4 4 6208.18 +Manufacturer#5 almond azure blanched chiffon midnight 23 5 5 7672.66 +PREHOOK: query: -- 12.2 testFunctionChain +explain +select p_mfgr, p_name, p_size, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +sum(p_retailprice) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s1 +from noop(on noopwithmapstreaming(on noopstreaming(on part +partition by p_mfgr +order by p_mfgr, p_name +))) +PREHOOK: type: QUERY +POSTHOOK: query: -- 12.2 testFunctionChain +explain +select p_mfgr, p_name, p_size, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +sum(p_retailprice) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s1 +from noop(on noopwithmapstreaming(on noopstreaming(on part +partition by p_mfgr +order by p_mfgr, p_name +))) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_mfgr (type: string), p_mfgr (type: string), p_name (type: string) + sort order: +++ + Map-reduce partition columns: p_mfgr (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int), p_retailprice (type: double) + Reducer 2 + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: string), _col2 (type: string), _col1 (type: string) + sort order: +++ + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int), _col7 (type: double) + Reducer 3 + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col5 (type: int), _col7 (type: double) + Reducer 4 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) + outputColumnNames: _col1, _col2, _col5, _col7 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr, p_name, p_size, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +sum(p_retailprice) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s1 +from noop(on noopwithmapstreaming(on noopstreaming(on part +partition by p_mfgr +order by p_mfgr, p_name +))) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, p_name, p_size, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +sum(p_retailprice) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s1 +from noop(on noopwithmapstreaming(on noopstreaming(on part +partition by p_mfgr +order by p_mfgr, p_name +))) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +Manufacturer#1 almond antique burnished rose metallic 2 1 1 1173.15 +Manufacturer#1 almond antique burnished rose metallic 2 1 1 2346.3 +Manufacturer#1 almond antique chartreuse lavender yellow 34 3 2 4100.06 +Manufacturer#1 almond antique salmon chartreuse burlywood 6 4 3 5702.650000000001 +Manufacturer#1 almond aquamarine burnished black steel 28 5 4 7117.070000000001 +Manufacturer#1 almond aquamarine pink moccasin thistle 42 6 5 8749.730000000001 +Manufacturer#2 almond antique violet chocolate turquoise 14 1 1 1690.68 +Manufacturer#2 almond antique violet turquoise frosted 40 2 2 3491.38 +Manufacturer#2 almond aquamarine midnight light salmon 2 3 3 5523.360000000001 +Manufacturer#2 almond aquamarine rose maroon antique 25 4 4 7222.02 +Manufacturer#2 almond aquamarine sandy cyan gainsboro 18 5 5 8923.62 +Manufacturer#3 almond antique chartreuse khaki white 17 1 1 1671.68 +Manufacturer#3 almond antique forest lavender goldenrod 14 2 2 2861.95 +Manufacturer#3 almond antique metallic orange dim 19 3 3 4272.34 +Manufacturer#3 almond antique misty red olive 1 4 4 6195.32 +Manufacturer#3 almond antique olive coral navajo 45 5 5 7532.61 +Manufacturer#4 almond antique gainsboro frosted violet 10 1 1 1620.67 +Manufacturer#4 almond antique violet mint lemon 39 2 2 2996.09 +Manufacturer#4 almond aquamarine floral ivory bisque 27 3 3 4202.35 +Manufacturer#4 almond aquamarine yellow dodger mint 7 4 4 6047.27 +Manufacturer#4 almond azure aquamarine papaya violet 12 5 5 7337.620000000001 +Manufacturer#5 almond antique blue firebrick mint 31 1 1 1789.69 +Manufacturer#5 almond antique medium spring khaki 6 2 2 3401.3500000000004 +Manufacturer#5 almond antique sky peru orange 2 3 3 5190.08 +Manufacturer#5 almond aquamarine dodger light gainsboro 46 4 4 6208.18 +Manufacturer#5 almond azure blanched chiffon midnight 23 5 5 7672.66 +PREHOOK: query: -- 14. testPTFJoinWithWindowingWithCount +explain +select abc.p_mfgr, abc.p_name, +rank() over (distribute by abc.p_mfgr sort by abc.p_name) as r, +dense_rank() over (distribute by abc.p_mfgr sort by abc.p_name) as dr, +count(abc.p_name) over (distribute by abc.p_mfgr sort by abc.p_name) as cd, +abc.p_retailprice, sum(abc.p_retailprice) over (distribute by abc.p_mfgr sort by abc.p_name rows between unbounded preceding and current row) as s1, +abc.p_size, abc.p_size - lag(abc.p_size,1,abc.p_size) over (distribute by abc.p_mfgr sort by abc.p_name) as deltaSz +from noopstreaming(on part +partition by p_mfgr +order by p_name +) abc join part p1 on abc.p_partkey = p1.p_partkey +PREHOOK: type: QUERY +POSTHOOK: query: -- 14. testPTFJoinWithWindowingWithCount +explain +select abc.p_mfgr, abc.p_name, +rank() over (distribute by abc.p_mfgr sort by abc.p_name) as r, +dense_rank() over (distribute by abc.p_mfgr sort by abc.p_name) as dr, +count(abc.p_name) over (distribute by abc.p_mfgr sort by abc.p_name) as cd, +abc.p_retailprice, sum(abc.p_retailprice) over (distribute by abc.p_mfgr sort by abc.p_name rows between unbounded preceding and current row) as s1, +abc.p_size, abc.p_size - lag(abc.p_size,1,abc.p_size) over (distribute by abc.p_mfgr sort by abc.p_name) as deltaSz +from noopstreaming(on part +partition by p_mfgr +order by p_name +) abc join part p1 on abc.p_partkey = p1.p_partkey +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Map 5 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_mfgr (type: string), p_name (type: string) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_size (type: int), p_retailprice (type: double) + Map 5 + Map Operator Tree: + TableScan + alias: p1 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: p_partkey is not null (type: boolean) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_partkey (type: int) + sort order: + + Map-reduce partition columns: p_partkey (type: int) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: _col0 is not null (type: boolean) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int), _col7 (type: double) + Reducer 3 + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 p_partkey (type: int) + outputColumnNames: _col1, _col2, _col5, _col7 + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + value expressions: _col5 (type: int), _col7 (type: double) + Reducer 4 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) + outputColumnNames: _col1, _col2, _col5, _col7 + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col1 (type: string), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: bigint), _col7 (type: double), _wcol3 (type: double), _col5 (type: int), (_col5 - _wcol4) (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select abc.p_mfgr, abc.p_name, +rank() over (distribute by abc.p_mfgr sort by abc.p_name) as r, +dense_rank() over (distribute by abc.p_mfgr sort by abc.p_name) as dr, +count(abc.p_name) over (distribute by abc.p_mfgr sort by abc.p_name) as cd, +abc.p_retailprice, sum(abc.p_retailprice) over (distribute by abc.p_mfgr sort by abc.p_name rows between unbounded preceding and current row) as s1, +abc.p_size, abc.p_size - lag(abc.p_size,1,abc.p_size) over (distribute by abc.p_mfgr sort by abc.p_name) as deltaSz +from noopstreaming(on part +partition by p_mfgr +order by p_name +) abc join part p1 on abc.p_partkey = p1.p_partkey +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select abc.p_mfgr, abc.p_name, +rank() over (distribute by abc.p_mfgr sort by abc.p_name) as r, +dense_rank() over (distribute by abc.p_mfgr sort by abc.p_name) as dr, +count(abc.p_name) over (distribute by abc.p_mfgr sort by abc.p_name) as cd, +abc.p_retailprice, sum(abc.p_retailprice) over (distribute by abc.p_mfgr sort by abc.p_name rows between unbounded preceding and current row) as s1, +abc.p_size, abc.p_size - lag(abc.p_size,1,abc.p_size) over (distribute by abc.p_mfgr sort by abc.p_name) as deltaSz +from noopstreaming(on part +partition by p_mfgr +order by p_name +) abc join part p1 on abc.p_partkey = p1.p_partkey +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +Manufacturer#1 almond antique burnished rose metallic 1 1 4 1173.15 1173.15 2 0 +Manufacturer#1 almond antique burnished rose metallic 1 1 4 1173.15 2346.3 2 0 +Manufacturer#1 almond antique burnished rose metallic 1 1 4 1173.15 3519.4500000000003 2 0 +Manufacturer#1 almond antique burnished rose metallic 1 1 4 1173.15 4692.6 2 0 +Manufacturer#1 almond antique chartreuse lavender yellow 5 2 5 1753.76 6446.360000000001 34 32 +Manufacturer#1 almond antique salmon chartreuse burlywood 6 3 6 1602.59 8048.950000000001 6 -28 +Manufacturer#1 almond aquamarine burnished black steel 7 4 7 1414.42 9463.37 28 22 +Manufacturer#1 almond aquamarine pink moccasin thistle 8 5 8 1632.66 11096.03 42 14 +Manufacturer#2 almond antique violet chocolate turquoise 1 1 1 1690.68 1690.68 14 0 +Manufacturer#2 almond antique violet turquoise frosted 2 2 2 1800.7 3491.38 40 26 +Manufacturer#2 almond aquamarine midnight light salmon 3 3 3 2031.98 5523.360000000001 2 -38 +Manufacturer#2 almond aquamarine rose maroon antique 4 4 4 1698.66 7222.02 25 23 +Manufacturer#2 almond aquamarine sandy cyan gainsboro 5 5 5 1701.6 8923.62 18 -7 +Manufacturer#3 almond antique chartreuse khaki white 1 1 1 1671.68 1671.68 17 0 +Manufacturer#3 almond antique forest lavender goldenrod 2 2 2 1190.27 2861.95 14 -3 +Manufacturer#3 almond antique metallic orange dim 3 3 3 1410.39 4272.34 19 5 +Manufacturer#3 almond antique misty red olive 4 4 4 1922.98 6195.32 1 -18 +Manufacturer#3 almond antique olive coral navajo 5 5 5 1337.29 7532.61 45 44 +Manufacturer#4 almond antique gainsboro frosted violet 1 1 1 1620.67 1620.67 10 0 +Manufacturer#4 almond antique violet mint lemon 2 2 2 1375.42 2996.09 39 29 +Manufacturer#4 almond aquamarine floral ivory bisque 3 3 3 1206.26 4202.35 27 -12 +Manufacturer#4 almond aquamarine yellow dodger mint 4 4 4 1844.92 6047.27 7 -20 +Manufacturer#4 almond azure aquamarine papaya violet 5 5 5 1290.35 7337.620000000001 12 5 +Manufacturer#5 almond antique blue firebrick mint 1 1 1 1789.69 1789.69 31 0 +Manufacturer#5 almond antique medium spring khaki 2 2 2 1611.66 3401.3500000000004 6 -25 +Manufacturer#5 almond antique sky peru orange 3 3 3 1788.73 5190.08 2 -4 +Manufacturer#5 almond aquamarine dodger light gainsboro 4 4 4 1018.1 6208.18 46 44 +Manufacturer#5 almond azure blanched chiffon midnight 5 5 5 1464.48 7672.66 23 -23 +PREHOOK: query: -- 18. testMulti2OperatorsFunctionChainWithMap +explain +select p_mfgr, p_name, +rank() over (partition by p_mfgr,p_name) as r, +dense_rank() over (partition by p_mfgr,p_name) as dr, +p_size, sum(p_size) over (partition by p_mfgr,p_name rows between unbounded preceding and current row) as s1 +from noopstreaming(on + noopwithmap(on + noop(on + noopstreaming(on part + partition by p_mfgr + order by p_mfgr) + ) + partition by p_mfgr,p_name + order by p_mfgr,p_name) + partition by p_mfgr,p_name + order by p_mfgr,p_name) +PREHOOK: type: QUERY +POSTHOOK: query: -- 18. testMulti2OperatorsFunctionChainWithMap +explain +select p_mfgr, p_name, +rank() over (partition by p_mfgr,p_name) as r, +dense_rank() over (partition by p_mfgr,p_name) as dr, +p_size, sum(p_size) over (partition by p_mfgr,p_name rows between unbounded preceding and current row) as s1 +from noopstreaming(on + noopwithmap(on + noop(on + noopstreaming(on part + partition by p_mfgr + order by p_mfgr) + ) + partition by p_mfgr,p_name + order by p_mfgr,p_name) + partition by p_mfgr,p_name + order by p_mfgr,p_name) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_mfgr (type: string), p_mfgr (type: string) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int) + Reducer 2 + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string), _col2 (type: string), _col1 (type: string) + sort order: ++++ + Map-reduce partition columns: _col2 (type: string), _col1 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) + Reducer 3 + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string), _col2 (type: string), _col1 (type: string) + sort order: ++++ + Map-reduce partition columns: _col2 (type: string), _col1 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col5 (type: int) + Reducer 4 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) + outputColumnNames: _col1, _col2, _col5 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col1 (type: string), _wcol0 (type: int), _wcol1 (type: int), _col5 (type: int), _wcol2 (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr, p_name, +rank() over (partition by p_mfgr,p_name) as r, +dense_rank() over (partition by p_mfgr,p_name) as dr, +p_size, sum(p_size) over (partition by p_mfgr,p_name rows between unbounded preceding and current row) as s1 +from noopstreaming(on + noopwithmap(on + noop(on + noopstreaming(on part + partition by p_mfgr + order by p_mfgr) + ) + partition by p_mfgr,p_name + order by p_mfgr,p_name) + partition by p_mfgr,p_name + order by p_mfgr,p_name) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, p_name, +rank() over (partition by p_mfgr,p_name) as r, +dense_rank() over (partition by p_mfgr,p_name) as dr, +p_size, sum(p_size) over (partition by p_mfgr,p_name rows between unbounded preceding and current row) as s1 +from noopstreaming(on + noopwithmap(on + noop(on + noopstreaming(on part + partition by p_mfgr + order by p_mfgr) + ) + partition by p_mfgr,p_name + order by p_mfgr,p_name) + partition by p_mfgr,p_name + order by p_mfgr,p_name) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +Manufacturer#1 almond antique burnished rose metallic 1 1 2 2 +Manufacturer#1 almond antique burnished rose metallic 1 1 2 4 +Manufacturer#1 almond antique chartreuse lavender yellow 1 1 34 34 +Manufacturer#1 almond antique salmon chartreuse burlywood 1 1 6 6 +Manufacturer#1 almond aquamarine burnished black steel 1 1 28 28 +Manufacturer#1 almond aquamarine pink moccasin thistle 1 1 42 42 +Manufacturer#2 almond antique violet chocolate turquoise 1 1 14 14 +Manufacturer#2 almond antique violet turquoise frosted 1 1 40 40 +Manufacturer#2 almond aquamarine midnight light salmon 1 1 2 2 +Manufacturer#2 almond aquamarine rose maroon antique 1 1 25 25 +Manufacturer#2 almond aquamarine sandy cyan gainsboro 1 1 18 18 +Manufacturer#3 almond antique chartreuse khaki white 1 1 17 17 +Manufacturer#3 almond antique forest lavender goldenrod 1 1 14 14 +Manufacturer#3 almond antique metallic orange dim 1 1 19 19 +Manufacturer#3 almond antique misty red olive 1 1 1 1 +Manufacturer#3 almond antique olive coral navajo 1 1 45 45 +Manufacturer#4 almond antique gainsboro frosted violet 1 1 10 10 +Manufacturer#4 almond antique violet mint lemon 1 1 39 39 +Manufacturer#4 almond aquamarine floral ivory bisque 1 1 27 27 +Manufacturer#4 almond aquamarine yellow dodger mint 1 1 7 7 +Manufacturer#4 almond azure aquamarine papaya violet 1 1 12 12 +Manufacturer#5 almond antique blue firebrick mint 1 1 31 31 +Manufacturer#5 almond antique medium spring khaki 1 1 6 6 +Manufacturer#5 almond antique sky peru orange 1 1 2 2 +Manufacturer#5 almond aquamarine dodger light gainsboro 1 1 46 46 +Manufacturer#5 almond azure blanched chiffon midnight 1 1 23 23 +PREHOOK: query: -- 19. testMulti3OperatorsFunctionChain +explain +select p_mfgr, p_name, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +p_size, sum(p_size) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s1 +from noop(on + noopstreaming(on + noop(on + noopstreaming(on part + partition by p_mfgr + order by p_mfgr) + ) + partition by p_mfgr,p_name + order by p_mfgr,p_name) + partition by p_mfgr + order by p_mfgr ) +PREHOOK: type: QUERY +POSTHOOK: query: -- 19. testMulti3OperatorsFunctionChain +explain +select p_mfgr, p_name, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +p_size, sum(p_size) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s1 +from noop(on + noopstreaming(on + noop(on + noopstreaming(on part + partition by p_mfgr + order by p_mfgr) + ) + partition by p_mfgr,p_name + order by p_mfgr,p_name) + partition by p_mfgr + order by p_mfgr ) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) + Reducer 5 <- Reducer 4 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_mfgr (type: string), p_mfgr (type: string) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int) + Reducer 2 + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string), _col2 (type: string), _col1 (type: string) + sort order: ++++ + Map-reduce partition columns: _col2 (type: string), _col1 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) + Reducer 3 + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: string), _col2 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) + Reducer 4 + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col5 (type: int) + Reducer 5 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) + outputColumnNames: _col1, _col2, _col5 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col1 (type: string), _wcol0 (type: int), _wcol1 (type: int), _col5 (type: int), _wcol2 (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr, p_name, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +p_size, sum(p_size) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s1 +from noop(on + noopstreaming(on + noop(on + noopstreaming(on part + partition by p_mfgr + order by p_mfgr) + ) + partition by p_mfgr,p_name + order by p_mfgr,p_name) + partition by p_mfgr + order by p_mfgr ) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, p_name, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +p_size, sum(p_size) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s1 +from noop(on + noopstreaming(on + noop(on + noopstreaming(on part + partition by p_mfgr + order by p_mfgr) + ) + partition by p_mfgr,p_name + order by p_mfgr,p_name) + partition by p_mfgr + order by p_mfgr ) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +Manufacturer#1 almond antique burnished rose metallic 1 1 2 2 +Manufacturer#1 almond antique burnished rose metallic 1 1 2 4 +Manufacturer#1 almond antique chartreuse lavender yellow 3 2 34 38 +Manufacturer#1 almond antique salmon chartreuse burlywood 4 3 6 44 +Manufacturer#1 almond aquamarine burnished black steel 5 4 28 72 +Manufacturer#1 almond aquamarine pink moccasin thistle 6 5 42 114 +Manufacturer#2 almond antique violet chocolate turquoise 1 1 14 14 +Manufacturer#2 almond antique violet turquoise frosted 2 2 40 54 +Manufacturer#2 almond aquamarine midnight light salmon 3 3 2 56 +Manufacturer#2 almond aquamarine rose maroon antique 4 4 25 81 +Manufacturer#2 almond aquamarine sandy cyan gainsboro 5 5 18 99 +Manufacturer#3 almond antique chartreuse khaki white 1 1 17 17 +Manufacturer#3 almond antique forest lavender goldenrod 2 2 14 31 +Manufacturer#3 almond antique metallic orange dim 3 3 19 50 +Manufacturer#3 almond antique misty red olive 4 4 1 51 +Manufacturer#3 almond antique olive coral navajo 5 5 45 96 +Manufacturer#4 almond antique gainsboro frosted violet 1 1 10 10 +Manufacturer#4 almond antique violet mint lemon 2 2 39 49 +Manufacturer#4 almond aquamarine floral ivory bisque 3 3 27 76 +Manufacturer#4 almond aquamarine yellow dodger mint 4 4 7 83 +Manufacturer#4 almond azure aquamarine papaya violet 5 5 12 95 +Manufacturer#5 almond antique blue firebrick mint 1 1 31 31 +Manufacturer#5 almond antique medium spring khaki 2 2 6 37 +Manufacturer#5 almond antique sky peru orange 3 3 2 39 +Manufacturer#5 almond aquamarine dodger light gainsboro 4 4 46 85 +Manufacturer#5 almond azure blanched chiffon midnight 5 5 23 108 +PREHOOK: query: -- 23. testMultiOperatorChainWithDiffPartitionForWindow2 +explain +select p_mfgr, p_name, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +p_size, +sum(p_size) over (partition by p_mfgr order by p_name range between unbounded preceding and current row) as s1, +sum(p_size) over (partition by p_mfgr order by p_name range between unbounded preceding and current row) as s2 +from noopwithmapstreaming(on + noop(on + noopstreaming(on part + partition by p_mfgr, p_name + order by p_mfgr, p_name) + )) +PREHOOK: type: QUERY +POSTHOOK: query: -- 23. testMultiOperatorChainWithDiffPartitionForWindow2 +explain +select p_mfgr, p_name, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +p_size, +sum(p_size) over (partition by p_mfgr order by p_name range between unbounded preceding and current row) as s1, +sum(p_size) over (partition by p_mfgr order by p_name range between unbounded preceding and current row) as s2 +from noopwithmapstreaming(on + noop(on + noopstreaming(on part + partition by p_mfgr, p_name + order by p_mfgr, p_name) + )) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_mfgr (type: string), p_name (type: string), p_mfgr (type: string), p_name (type: string) + sort order: ++++ + Map-reduce partition columns: p_mfgr (type: string), p_name (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int) + Reducer 2 + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string), _col2 (type: string), _col1 (type: string) + sort order: ++++ + Map-reduce partition columns: _col2 (type: string), _col1 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) + Reducer 3 + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col5 (type: int) + Reducer 4 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) + outputColumnNames: _col1, _col2, _col5 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col1 (type: string), _wcol0 (type: int), _wcol1 (type: int), _col5 (type: int), _wcol2 (type: bigint), _wcol2 (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr, p_name, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +p_size, +sum(p_size) over (partition by p_mfgr order by p_name range between unbounded preceding and current row) as s1, +sum(p_size) over (partition by p_mfgr order by p_name range between unbounded preceding and current row) as s2 +from noopwithmapstreaming(on + noop(on + noopstreaming(on part + partition by p_mfgr, p_name + order by p_mfgr, p_name) + )) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, p_name, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +p_size, +sum(p_size) over (partition by p_mfgr order by p_name range between unbounded preceding and current row) as s1, +sum(p_size) over (partition by p_mfgr order by p_name range between unbounded preceding and current row) as s2 +from noopwithmapstreaming(on + noop(on + noopstreaming(on part + partition by p_mfgr, p_name + order by p_mfgr, p_name) + )) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +Manufacturer#1 almond antique burnished rose metallic 1 1 2 4 4 +Manufacturer#1 almond antique burnished rose metallic 1 1 2 4 4 +Manufacturer#1 almond antique chartreuse lavender yellow 3 2 34 38 38 +Manufacturer#1 almond antique salmon chartreuse burlywood 4 3 6 44 44 +Manufacturer#1 almond aquamarine burnished black steel 5 4 28 72 72 +Manufacturer#1 almond aquamarine pink moccasin thistle 6 5 42 114 114 +Manufacturer#2 almond antique violet chocolate turquoise 1 1 14 14 14 +Manufacturer#2 almond antique violet turquoise frosted 2 2 40 54 54 +Manufacturer#2 almond aquamarine midnight light salmon 3 3 2 56 56 +Manufacturer#2 almond aquamarine rose maroon antique 4 4 25 81 81 +Manufacturer#2 almond aquamarine sandy cyan gainsboro 5 5 18 99 99 +Manufacturer#3 almond antique chartreuse khaki white 1 1 17 17 17 +Manufacturer#3 almond antique forest lavender goldenrod 2 2 14 31 31 +Manufacturer#3 almond antique metallic orange dim 3 3 19 50 50 +Manufacturer#3 almond antique misty red olive 4 4 1 51 51 +Manufacturer#3 almond antique olive coral navajo 5 5 45 96 96 +Manufacturer#4 almond antique gainsboro frosted violet 1 1 10 10 10 +Manufacturer#4 almond antique violet mint lemon 2 2 39 49 49 +Manufacturer#4 almond aquamarine floral ivory bisque 3 3 27 76 76 +Manufacturer#4 almond aquamarine yellow dodger mint 4 4 7 83 83 +Manufacturer#4 almond azure aquamarine papaya violet 5 5 12 95 95 +Manufacturer#5 almond antique blue firebrick mint 1 1 31 31 31 +Manufacturer#5 almond antique medium spring khaki 2 2 6 37 37 +Manufacturer#5 almond antique sky peru orange 3 3 2 39 39 +Manufacturer#5 almond aquamarine dodger light gainsboro 4 4 46 85 85 +Manufacturer#5 almond azure blanched chiffon midnight 5 5 23 108 108 diff --git ql/src/test/results/clientpositive/tez/vectorized_ptf.q.out ql/src/test/results/clientpositive/tez/vectorized_ptf.q.out index 928c9f0..a814849 100644 --- ql/src/test/results/clientpositive/tez/vectorized_ptf.q.out +++ ql/src/test/results/clientpositive/tez/vectorized_ptf.q.out @@ -244,7 +244,7 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string), BLOCK__OFFSET__INSIDE__FILE (type: bigint), INPUT__FILE__NAME (type: string), ROW__ID (type: struct) + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int), p_retailprice (type: double) auto parallelism: true Path -> Alias: #### A masked pattern was here #### @@ -295,6 +295,7 @@ STAGE PLANS: name: default.part_orc Truncated Path -> Alias: /part_orc [part_orc] + Execution mode: vectorized Reducer 2 Needs Tagging: false Reduce Operator Tree: @@ -302,18 +303,14 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE PTF Operator Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int), _col7 (type: double) - outputColumnNames: _col1, _col2, _col5, _col7 + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col2 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col2 (type: string) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col5 (type: int), _col7 (type: double) - auto parallelism: true + tag: -1 + value expressions: _col5 (type: int), _col7 (type: double) + auto parallelism: true Reducer 3 Needs Tagging: false Reduce Operator Tree: @@ -536,7 +533,7 @@ STAGE PLANS: Map-reduce partition columns: p_partkey (type: int) Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE tag: 0 - value expressions: p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int) auto parallelism: true Path -> Alias: #### A masked pattern was here #### @@ -664,7 +661,7 @@ STAGE PLANS: keys: 0 p_partkey (type: int) 1 p_partkey (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + outputColumnNames: _col1, _col2, _col5 Position of Big Table: 0 Statistics: Num rows: 14 Data size: 8823 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator @@ -673,7 +670,7 @@ STAGE PLANS: Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 14 Data size: 8823 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) auto parallelism: true Reducer 3 Needs Tagging: false @@ -682,18 +679,14 @@ STAGE PLANS: Statistics: Num rows: 14 Data size: 8823 Basic stats: COMPLETE Column stats: NONE PTF Operator Statistics: Num rows: 14 Data size: 8823 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) - outputColumnNames: _col1, _col2, _col5 + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 14 Data size: 8823 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col2 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col2 (type: string) - Statistics: Num rows: 14 Data size: 8823 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col5 (type: int) - auto parallelism: true + tag: -1 + value expressions: _col5 (type: int) + auto parallelism: true Reducer 4 Needs Tagging: false Reduce Operator Tree: @@ -851,7 +844,7 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string), BLOCK__OFFSET__INSIDE__FILE (type: bigint), INPUT__FILE__NAME (type: string), ROW__ID (type: struct) + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int) auto parallelism: true Path -> Alias: #### A masked pattern was here #### @@ -902,6 +895,7 @@ STAGE PLANS: name: default.part_orc Truncated Path -> Alias: /part_orc [part_orc] + Execution mode: vectorized Reducer 2 Needs Tagging: false Reduce Operator Tree: @@ -1108,7 +1102,7 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string), BLOCK__OFFSET__INSIDE__FILE (type: bigint), INPUT__FILE__NAME (type: string), ROW__ID (type: struct) + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int), p_retailprice (type: double) auto parallelism: true Path -> Alias: #### A masked pattern was here #### @@ -1159,6 +1153,7 @@ STAGE PLANS: name: default.part_orc Truncated Path -> Alias: /part_orc [part_orc] + Execution mode: vectorized Reducer 2 Needs Tagging: false Reduce Operator Tree: @@ -1166,18 +1161,14 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE PTF Operator Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int), _col7 (type: double) - outputColumnNames: _col1, _col2, _col5, _col7 + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col2 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col2 (type: string) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col5 (type: int), _col7 (type: double) - auto parallelism: true + tag: -1 + value expressions: _col5 (type: int), _col7 (type: double) + auto parallelism: true Reducer 3 Needs Tagging: false Reduce Operator Tree: @@ -1398,7 +1389,7 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string), BLOCK__OFFSET__INSIDE__FILE (type: bigint), INPUT__FILE__NAME (type: string), ROW__ID (type: struct) + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int) auto parallelism: true Path -> Alias: #### A masked pattern was here #### @@ -1449,6 +1440,7 @@ STAGE PLANS: name: default.part_orc Truncated Path -> Alias: /part_orc [part_orc] + Execution mode: vectorized Reducer 2 Needs Tagging: false Reduce Operator Tree: @@ -1456,18 +1448,14 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE PTF Operator Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) - outputColumnNames: _col1, _col2, _col5 + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col2 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col2 (type: string) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col5 (type: int) - auto parallelism: true + tag: -1 + value expressions: _col5 (type: int) + auto parallelism: true Reducer 3 Needs Tagging: false Reduce Operator Tree: @@ -1698,7 +1686,7 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string), BLOCK__OFFSET__INSIDE__FILE (type: bigint), INPUT__FILE__NAME (type: string), ROW__ID (type: struct) + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int) auto parallelism: true Path -> Alias: #### A masked pattern was here #### @@ -1749,6 +1737,7 @@ STAGE PLANS: name: default.part_orc Truncated Path -> Alias: /part_orc [part_orc] + Execution mode: vectorized Reducer 2 Needs Tagging: false Reduce Operator Tree: @@ -1964,7 +1953,7 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string), BLOCK__OFFSET__INSIDE__FILE (type: bigint), INPUT__FILE__NAME (type: string), ROW__ID (type: struct) + value expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) auto parallelism: true Path -> Alias: #### A masked pattern was here #### @@ -2015,6 +2004,7 @@ STAGE PLANS: name: default.part_orc Truncated Path -> Alias: /part_orc [part_orc] + Execution mode: vectorized Map 4 Map Operator Tree: TableScan @@ -2089,22 +2079,18 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE PTF Operator Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Filter Operator - isSamplingPred: false - predicate: _col0 is not null (type: boolean) + Filter Operator + isSamplingPred: false + predicate: _col0 is not null (type: boolean) + Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE - tag: 0 - value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) - auto parallelism: true + tag: 0 + value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + auto parallelism: true Reducer 3 Needs Tagging: false Reduce Operator Tree: @@ -2345,7 +2331,7 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string), BLOCK__OFFSET__INSIDE__FILE (type: bigint), INPUT__FILE__NAME (type: string), ROW__ID (type: struct) + value expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) auto parallelism: true Path -> Alias: #### A masked pattern was here #### @@ -2396,6 +2382,7 @@ STAGE PLANS: name: default.part_orc Truncated Path -> Alias: /part_orc [part_orc] + Execution mode: vectorized Reducer 2 Needs Tagging: false Reduce Operator Tree: @@ -2440,22 +2427,18 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE PTF Operator Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Filter Operator - isSamplingPred: false - predicate: _col0 is not null (type: boolean) + Filter Operator + isSamplingPred: false + predicate: _col0 is not null (type: boolean) + Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE - tag: 1 - value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) - auto parallelism: true + tag: 1 + value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + auto parallelism: true Stage: Stage-0 Fetch Operator @@ -2603,7 +2586,7 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string), BLOCK__OFFSET__INSIDE__FILE (type: bigint), INPUT__FILE__NAME (type: string), ROW__ID (type: struct) + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int) auto parallelism: true Path -> Alias: #### A masked pattern was here #### @@ -2661,17 +2644,13 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE PTF Operator Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) - outputColumnNames: _col1, _col2, _col5 + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int) + sort order: ++- + Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int) - sort order: ++- - Map-reduce partition columns: _col2 (type: string) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 - auto parallelism: true + tag: -1 + auto parallelism: true Reducer 3 Needs Tagging: false Reduce Operator Tree: @@ -2881,7 +2860,7 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string), BLOCK__OFFSET__INSIDE__FILE (type: bigint), INPUT__FILE__NAME (type: string), ROW__ID (type: struct) + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int), p_retailprice (type: double) auto parallelism: true Path -> Alias: #### A masked pattern was here #### @@ -2939,18 +2918,14 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE PTF Operator Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int), _col7 (type: double) - outputColumnNames: _col1, _col2, _col5, _col7 + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col2 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col2 (type: string) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col5 (type: int), _col7 (type: double) - auto parallelism: true + tag: -1 + value expressions: _col5 (type: int), _col7 (type: double) + auto parallelism: true Reducer 3 Needs Tagging: false Reduce Operator Tree: @@ -3162,7 +3137,7 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string), BLOCK__OFFSET__INSIDE__FILE (type: bigint), INPUT__FILE__NAME (type: string), ROW__ID (type: struct) + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int), p_retailprice (type: double) auto parallelism: true Path -> Alias: #### A masked pattern was here #### @@ -3213,6 +3188,7 @@ STAGE PLANS: name: default.part_orc Truncated Path -> Alias: /part_orc [part_orc] + Execution mode: vectorized Reducer 2 Needs Tagging: false Reduce Operator Tree: @@ -3220,18 +3196,14 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE PTF Operator Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int), _col7 (type: double) - outputColumnNames: _col1, _col2, _col5, _col7 + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col2 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col2 (type: string) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col5 (type: int), _col7 (type: double) - auto parallelism: true + tag: -1 + value expressions: _col5 (type: int), _col7 (type: double) + auto parallelism: true Reducer 3 Needs Tagging: false Reduce Operator Tree: @@ -3453,7 +3425,7 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string), BLOCK__OFFSET__INSIDE__FILE (type: bigint), INPUT__FILE__NAME (type: string), ROW__ID (type: struct) + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int), p_retailprice (type: double) auto parallelism: true Path -> Alias: #### A masked pattern was here #### @@ -3504,6 +3476,7 @@ STAGE PLANS: name: default.part_orc Truncated Path -> Alias: /part_orc [part_orc] + Execution mode: vectorized Reducer 2 Needs Tagging: false Reduce Operator Tree: @@ -3519,7 +3492,7 @@ STAGE PLANS: Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: bigint), _col10 (type: string), _col11 (type: struct) + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int), _col7 (type: double) auto parallelism: true Reducer 3 Needs Tagging: false @@ -3528,18 +3501,14 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE PTF Operator Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int), _col7 (type: double) - outputColumnNames: _col1, _col2, _col5, _col7 + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col2 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col2 (type: string) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col5 (type: int), _col7 (type: double) - auto parallelism: true + tag: -1 + value expressions: _col5 (type: int), _col7 (type: double) + auto parallelism: true Reducer 4 Needs Tagging: false Reduce Operator Tree: @@ -3781,7 +3750,7 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string), BLOCK__OFFSET__INSIDE__FILE (type: bigint), INPUT__FILE__NAME (type: string), ROW__ID (type: struct) + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int), p_retailprice (type: double) auto parallelism: true Path -> Alias: #### A masked pattern was here #### @@ -3832,6 +3801,7 @@ STAGE PLANS: name: default.part_orc Truncated Path -> Alias: /part_orc [part_orc] + Execution mode: vectorized Reducer 2 Needs Tagging: false Reduce Operator Tree: @@ -3839,18 +3809,14 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE PTF Operator Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int), _col7 (type: double) - outputColumnNames: _col1, _col2, _col5, _col7 + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col2 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col2 (type: string) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col5 (type: int), _col7 (type: double) - auto parallelism: true + tag: -1 + value expressions: _col5 (type: int), _col7 (type: double) + auto parallelism: true Reducer 3 Needs Tagging: false Reduce Operator Tree: @@ -4169,7 +4135,7 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string), BLOCK__OFFSET__INSIDE__FILE (type: bigint), INPUT__FILE__NAME (type: string), ROW__ID (type: struct) + value expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_size (type: int), p_retailprice (type: double) auto parallelism: true Path -> Alias: #### A masked pattern was here #### @@ -4220,6 +4186,7 @@ STAGE PLANS: name: default.part_orc Truncated Path -> Alias: /part_orc [part_orc] + Execution mode: vectorized Map 5 Map Operator Tree: TableScan @@ -4294,22 +4261,18 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE PTF Operator Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col5 (type: int), _col7 (type: double) - outputColumnNames: _col0, _col1, _col2, _col5, _col7 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Filter Operator - isSamplingPred: false - predicate: _col0 is not null (type: boolean) + Filter Operator + isSamplingPred: false + predicate: _col0 is not null (type: boolean) + Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE - tag: 0 - value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int), _col7 (type: double) - auto parallelism: true + tag: 0 + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int), _col7 (type: double) + auto parallelism: true Reducer 3 Needs Tagging: false Reduce Operator Tree: @@ -4498,7 +4461,7 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string), BLOCK__OFFSET__INSIDE__FILE (type: bigint), INPUT__FILE__NAME (type: string), ROW__ID (type: struct) + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int) auto parallelism: true Path -> Alias: #### A masked pattern was here #### @@ -4549,6 +4512,7 @@ STAGE PLANS: name: default.part_orc Truncated Path -> Alias: /part_orc [part_orc] + Execution mode: vectorized Reducer 2 Needs Tagging: false Reduce Operator Tree: @@ -5216,10 +5180,10 @@ TOK_QUERY STAGE DEPENDENCIES: Stage-2 is a root stage Stage-3 depends on stages: Stage-2 - Stage-1 depends on stages: Stage-3 - Stage-4 depends on stages: Stage-1 Stage-0 depends on stages: Stage-3 - Stage-5 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-0 + Stage-1 depends on stages: Stage-3 + Stage-5 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-2 @@ -5227,8 +5191,8 @@ STAGE PLANS: Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (SIMPLE_EDGE) - Reducer 4 <- Reducer 3 (SIMPLE_EDGE) - Reducer 5 <- Reducer 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 2 (SIMPLE_EDGE) + Reducer 5 <- Reducer 4 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -5243,7 +5207,7 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string), BLOCK__OFFSET__INSIDE__FILE (type: bigint), INPUT__FILE__NAME (type: string), ROW__ID (type: struct) + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int), p_retailprice (type: double) auto parallelism: true Path -> Alias: #### A masked pattern was here #### @@ -5294,6 +5258,7 @@ STAGE PLANS: name: default.part_orc Truncated Path -> Alias: /part_orc [part_orc] + Execution mode: vectorized Reducer 2 Needs Tagging: false Reduce Operator Tree: @@ -5302,58 +5267,37 @@ STAGE PLANS: PTF Operator Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col2 (type: string), _col5 (type: int) + key expressions: _col2 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: _col1 (type: string) + value expressions: _col5 (type: int), _col7 (type: double) auto parallelism: true - Select Operator - expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int), _col7 (type: double) - outputColumnNames: _col1, _col2, _col5, _col7 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col2 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col2 (type: string) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col5 (type: int), _col7 (type: double) - auto parallelism: true - Reducer 3 - Needs Tagging: false - Reduce Operator Tree: - Select Operator - expressions: VALUE._col1 (type: string), KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: int) - outputColumnNames: _col1, _col2, _col5 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - PTF Operator - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col2 (type: string), _col2 (type: string), _col1 (type: string) - sort order: +++ + key expressions: _col2 (type: string), _col5 (type: int) + sort order: ++ Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: _wcol0 (type: bigint), _col5 (type: int) + value expressions: _col1 (type: string) auto parallelism: true - Reducer 4 + Reducer 3 Needs Tagging: false Reduce Operator Tree: Select Operator - expressions: VALUE._col0 (type: bigint), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col4 (type: int) - outputColumnNames: _col0, _col2, _col3, _col6 + expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) + outputColumnNames: _col1, _col2, _col5, _col7 Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE PTF Operator Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col3 (type: string), _col2 (type: string), _col6 (type: int), UDFToInteger(round(_col0, 1)) (type: int), _wcol1 (type: int), _wcol2 (type: int), _wcol3 (type: double), _wcol4 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 2 + GlobalTableId: 1 #### A masked pattern was here #### NumFilesPerFileSink: 1 Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE @@ -5363,36 +5307,53 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: bucket_count -1 - columns p_mfgr,p_name,p_size,s2,r,dr,cud,fv1 + columns p_mfgr,p_name,p_size,r,dr,s columns.comments - columns.types string:string:int:int:int:int:double:int + columns.types string:string:int:int:int:double #### A masked pattern was here #### - name default.part_5 - serialization.ddl struct part_5 { string p_mfgr, string p_name, i32 p_size, i32 s2, i32 r, i32 dr, double cud, i32 fv1} + name default.part_4 + serialization.ddl struct part_4 { string p_mfgr, string p_name, i32 p_size, i32 r, i32 dr, double s} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.part_5 + name: default.part_4 TotalFiles: 1 GatherStats: true MultiFileSpray: false + Reducer 4 + Needs Tagging: false + Reduce Operator Tree: + Select Operator + expressions: VALUE._col1 (type: string), KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: int) + outputColumnNames: _col1, _col2, _col5 + Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: string), _col2 (type: string), _col1 (type: string) + sort order: +++ + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _wcol0 (type: bigint), _col5 (type: int) + auto parallelism: true Reducer 5 Needs Tagging: false Reduce Operator Tree: Select Operator - expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) - outputColumnNames: _col1, _col2, _col5, _col7 + expressions: VALUE._col0 (type: bigint), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col4 (type: int) + outputColumnNames: _col0, _col2, _col3, _col6 Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE PTF Operator Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: double) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + expressions: _col3 (type: string), _col2 (type: string), _col6 (type: int), UDFToInteger(round(_col0, 1)) (type: int), _wcol1 (type: int), _wcol2 (type: int), _wcol3 (type: double), _wcol4 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + GlobalTableId: 2 #### A masked pattern was here #### NumFilesPerFileSink: 1 Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE @@ -5402,17 +5363,17 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: bucket_count -1 - columns p_mfgr,p_name,p_size,r,dr,s + columns p_mfgr,p_name,p_size,s2,r,dr,cud,fv1 columns.comments - columns.types string:string:int:int:int:double + columns.types string:string:int:int:int:int:double:int #### A masked pattern was here #### - name default.part_4 - serialization.ddl struct part_4 { string p_mfgr, string p_name, i32 p_size, i32 r, i32 dr, double s} + name default.part_5 + serialization.ddl struct part_5 { string p_mfgr, string p_name, i32 p_size, i32 s2, i32 r, i32 dr, double cud, i32 fv1} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.part_4 + name: default.part_5 TotalFiles: 1 GatherStats: true MultiFileSpray: false @@ -5420,7 +5381,7 @@ STAGE PLANS: Stage: Stage-3 Dependency Collection - Stage: Stage-1 + Stage: Stage-0 Move Operator tables: replace: true @@ -5430,23 +5391,23 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: bucket_count -1 - columns p_mfgr,p_name,p_size,s2,r,dr,cud,fv1 + columns p_mfgr,p_name,p_size,r,dr,s columns.comments - columns.types string:string:int:int:int:int:double:int + columns.types string:string:int:int:int:double #### A masked pattern was here #### - name default.part_5 - serialization.ddl struct part_5 { string p_mfgr, string p_name, i32 p_size, i32 s2, i32 r, i32 dr, double cud, i32 fv1} + name default.part_4 + serialization.ddl struct part_4 { string p_mfgr, string p_name, i32 p_size, i32 r, i32 dr, double s} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.part_5 + name: default.part_4 Stage: Stage-4 Stats-Aggr Operator #### A masked pattern was here #### - Stage: Stage-0 + Stage: Stage-1 Move Operator tables: replace: true @@ -5456,17 +5417,17 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: bucket_count -1 - columns p_mfgr,p_name,p_size,r,dr,s + columns p_mfgr,p_name,p_size,s2,r,dr,cud,fv1 columns.comments - columns.types string:string:int:int:int:double + columns.types string:string:int:int:int:int:double:int #### A masked pattern was here #### - name default.part_4 - serialization.ddl struct part_4 { string p_mfgr, string p_name, i32 p_size, i32 r, i32 dr, double s} + name default.part_5 + serialization.ddl struct part_5 { string p_mfgr, string p_name, i32 p_size, i32 s2, i32 r, i32 dr, double cud, i32 fv1} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.part_4 + name: default.part_5 Stage: Stage-5 Stats-Aggr Operator @@ -5757,7 +5718,7 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string), BLOCK__OFFSET__INSIDE__FILE (type: bigint), INPUT__FILE__NAME (type: string), ROW__ID (type: struct) + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int) auto parallelism: true Path -> Alias: #### A masked pattern was here #### @@ -5808,6 +5769,7 @@ STAGE PLANS: name: default.part_orc Truncated Path -> Alias: /part_orc [part_orc] + Execution mode: vectorized Reducer 2 Needs Tagging: false Reduce Operator Tree: @@ -5823,7 +5785,7 @@ STAGE PLANS: Map-reduce partition columns: _col2 (type: string), _col1 (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: bigint), _col10 (type: string), _col11 (type: struct) + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) auto parallelism: true Reducer 3 Needs Tagging: false @@ -5832,18 +5794,14 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE PTF Operator Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) - outputColumnNames: _col1, _col2, _col5 + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string), _col2 (type: string), _col1 (type: string) + sort order: ++++ + Map-reduce partition columns: _col2 (type: string), _col1 (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col2 (type: string), _col1 (type: string), _col2 (type: string), _col1 (type: string) - sort order: ++++ - Map-reduce partition columns: _col2 (type: string), _col1 (type: string) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col5 (type: int) - auto parallelism: true + tag: -1 + value expressions: _col5 (type: int) + auto parallelism: true Reducer 4 Needs Tagging: false Reduce Operator Tree: @@ -6116,7 +6074,7 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string), BLOCK__OFFSET__INSIDE__FILE (type: bigint), INPUT__FILE__NAME (type: string), ROW__ID (type: struct) + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int) auto parallelism: true Path -> Alias: #### A masked pattern was here #### @@ -6167,6 +6125,7 @@ STAGE PLANS: name: default.part_orc Truncated Path -> Alias: /part_orc [part_orc] + Execution mode: vectorized Reducer 2 Needs Tagging: false Reduce Operator Tree: @@ -6180,7 +6139,7 @@ STAGE PLANS: Map-reduce partition columns: _col2 (type: string), _col1 (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: bigint), _col10 (type: string), _col11 (type: struct) + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) auto parallelism: true Reducer 3 Needs Tagging: false @@ -6195,7 +6154,7 @@ STAGE PLANS: Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: bigint), _col10 (type: string), _col11 (type: struct) + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) auto parallelism: true Reducer 4 Needs Tagging: false @@ -6204,18 +6163,14 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE PTF Operator Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) - outputColumnNames: _col1, _col2, _col5 + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col2 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col2 (type: string) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col5 (type: int) - auto parallelism: true + tag: -1 + value expressions: _col5 (type: int) + auto parallelism: true Reducer 5 Needs Tagging: false Reduce Operator Tree: @@ -6471,7 +6426,7 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string), p_name (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string), BLOCK__OFFSET__INSIDE__FILE (type: bigint), INPUT__FILE__NAME (type: string), ROW__ID (type: struct) + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int) auto parallelism: true Path -> Alias: #### A masked pattern was here #### @@ -6522,6 +6477,7 @@ STAGE PLANS: name: default.part_orc Truncated Path -> Alias: /part_orc [part_orc] + Execution mode: vectorized Reducer 2 Needs Tagging: false Reduce Operator Tree: @@ -6535,7 +6491,7 @@ STAGE PLANS: Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: bigint), _col10 (type: string), _col11 (type: struct) + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) auto parallelism: true Reducer 3 Needs Tagging: false @@ -6544,18 +6500,14 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE PTF Operator Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) - outputColumnNames: _col1, _col2, _col5 + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col2 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col2 (type: string) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col5 (type: int) - auto parallelism: true + tag: -1 + value expressions: _col5 (type: int) + auto parallelism: true Reducer 4 Needs Tagging: false Reduce Operator Tree: @@ -6823,7 +6775,7 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string), p_name (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string), BLOCK__OFFSET__INSIDE__FILE (type: bigint), INPUT__FILE__NAME (type: string), ROW__ID (type: struct) + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int) auto parallelism: true Path -> Alias: #### A masked pattern was here #### @@ -6874,6 +6826,7 @@ STAGE PLANS: name: default.part_orc Truncated Path -> Alias: /part_orc [part_orc] + Execution mode: vectorized Reducer 2 Needs Tagging: false Reduce Operator Tree: @@ -6887,7 +6840,7 @@ STAGE PLANS: Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: bigint), _col10 (type: string), _col11 (type: struct) + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) auto parallelism: true Reducer 3 Needs Tagging: false @@ -6904,7 +6857,7 @@ STAGE PLANS: Map-reduce partition columns: _col2 (type: string), _col1 (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: bigint), _col10 (type: string), _col11 (type: struct) + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) auto parallelism: true Reducer 4 Needs Tagging: false @@ -6913,18 +6866,14 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE PTF Operator Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) - outputColumnNames: _col1, _col2, _col5 + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string), _col2 (type: string), _col1 (type: string) + sort order: ++++ + Map-reduce partition columns: _col2 (type: string), _col1 (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col2 (type: string), _col1 (type: string), _col2 (type: string), _col1 (type: string) - sort order: ++++ - Map-reduce partition columns: _col2 (type: string), _col1 (type: string) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col5 (type: int) - auto parallelism: true + tag: -1 + value expressions: _col5 (type: int) + auto parallelism: true Reducer 5 Needs Tagging: false Reduce Operator Tree: @@ -7223,7 +7172,7 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string), p_name (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string), BLOCK__OFFSET__INSIDE__FILE (type: bigint), INPUT__FILE__NAME (type: string), ROW__ID (type: struct) + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int) auto parallelism: true Path -> Alias: #### A masked pattern was here #### @@ -7274,6 +7223,7 @@ STAGE PLANS: name: default.part_orc Truncated Path -> Alias: /part_orc [part_orc] + Execution mode: vectorized Reducer 2 Needs Tagging: false Reduce Operator Tree: @@ -7289,7 +7239,7 @@ STAGE PLANS: Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: bigint), _col10 (type: string), _col11 (type: struct) + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) auto parallelism: true Reducer 3 Needs Tagging: false @@ -7298,18 +7248,14 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE PTF Operator Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) - outputColumnNames: _col1, _col2, _col5 + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string), _col2 (type: string), _col1 (type: string) + sort order: ++++ + Map-reduce partition columns: _col2 (type: string), _col1 (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col2 (type: string), _col1 (type: string), _col2 (type: string), _col1 (type: string) - sort order: ++++ - Map-reduce partition columns: _col2 (type: string), _col1 (type: string) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col5 (type: int) - auto parallelism: true + tag: -1 + value expressions: _col5 (type: int) + auto parallelism: true Reducer 4 Needs Tagging: false Reduce Operator Tree: @@ -7574,7 +7520,7 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string), p_name (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string), BLOCK__OFFSET__INSIDE__FILE (type: bigint), INPUT__FILE__NAME (type: string), ROW__ID (type: struct) + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int) auto parallelism: true Path -> Alias: #### A masked pattern was here #### @@ -7625,6 +7571,7 @@ STAGE PLANS: name: default.part_orc Truncated Path -> Alias: /part_orc [part_orc] + Execution mode: vectorized Reducer 2 Needs Tagging: false Reduce Operator Tree: @@ -7640,7 +7587,7 @@ STAGE PLANS: Map-reduce partition columns: _col2 (type: string), _col1 (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: bigint), _col10 (type: string), _col11 (type: struct) + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) auto parallelism: true Reducer 3 Needs Tagging: false @@ -7649,18 +7596,14 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE PTF Operator Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) - outputColumnNames: _col1, _col2, _col5 + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col2 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col2 (type: string) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col5 (type: int) - auto parallelism: true + tag: -1 + value expressions: _col5 (type: int) + auto parallelism: true Reducer 4 Needs Tagging: false Reduce Operator Tree: diff --git ql/src/test/results/clientpositive/vectorized_ptf.q.out ql/src/test/results/clientpositive/vectorized_ptf.q.out index 7fdd1d8..1e3c43c 100644 --- ql/src/test/results/clientpositive/vectorized_ptf.q.out +++ ql/src/test/results/clientpositive/vectorized_ptf.q.out @@ -239,7 +239,7 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string), BLOCK__OFFSET__INSIDE__FILE (type: bigint), INPUT__FILE__NAME (type: string), ROW__ID (type: struct) + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int), p_retailprice (type: double) auto parallelism: false Path -> Alias: #### A masked pattern was here #### @@ -290,33 +290,30 @@ STAGE PLANS: name: default.part_orc Truncated Path -> Alias: /part_orc [part_orc] + Execution mode: vectorized Needs Tagging: false Reduce Operator Tree: Extract Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE PTF Operator Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int), _col7 (type: double) - outputColumnNames: _col1, _col2, _col5, _col7 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 + File Output Operator + compressed: false + GlobalTableId: 0 #### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col1,_col2,_col5,_col7 - columns.types string,string,int,double - escape.delim \ - serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col1,_col2,_col5,_col7 + columns.types string,string,int,double + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Map Reduce @@ -572,7 +569,7 @@ STAGE PLANS: Map-reduce partition columns: p_partkey (type: int) Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE tag: 0 - value expressions: p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int) auto parallelism: false TableScan alias: p2 @@ -647,7 +644,7 @@ STAGE PLANS: keys: 0 p_partkey (type: int) 1 p_partkey (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + outputColumnNames: _col1, _col2, _col5 Statistics: Num rows: 14 Data size: 8823 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -658,8 +655,8 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: - columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8 - columns.types int,string,string,string,string,int,string,double,string + columns _col1,_col2,_col5 + columns.types string,string,int escape.delim \ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe @@ -678,7 +675,7 @@ STAGE PLANS: Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 14 Data size: 8823 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) auto parallelism: false Path -> Alias: #### A masked pattern was here #### @@ -689,8 +686,8 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: - columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8 - columns.types int,string,string,string,string,int,string,double,string + columns _col1,_col2,_col5 + columns.types string,string,int escape.delim \ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe @@ -698,8 +695,8 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: - columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8 - columns.types int,string,string,string,string,int,string,double,string + columns _col1,_col2,_col5 + columns.types string,string,int escape.delim \ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe @@ -711,27 +708,23 @@ STAGE PLANS: Statistics: Num rows: 14 Data size: 8823 Basic stats: COMPLETE Column stats: NONE PTF Operator Statistics: Num rows: 14 Data size: 8823 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) - outputColumnNames: _col1, _col2, _col5 - Statistics: Num rows: 14 Data size: 8823 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 + File Output Operator + compressed: false + GlobalTableId: 0 #### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col1,_col2,_col5 - columns.types string,string,int - escape.delim \ - serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col1,_col2,_col5 + columns.types string,string,int + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-3 Map Reduce @@ -922,7 +915,7 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string), BLOCK__OFFSET__INSIDE__FILE (type: bigint), INPUT__FILE__NAME (type: string), ROW__ID (type: struct) + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int) auto parallelism: false Path -> Alias: #### A masked pattern was here #### @@ -973,6 +966,7 @@ STAGE PLANS: name: default.part_orc Truncated Path -> Alias: /part_orc [part_orc] + Execution mode: vectorized Needs Tagging: false Reduce Operator Tree: Extract @@ -1173,7 +1167,7 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string), BLOCK__OFFSET__INSIDE__FILE (type: bigint), INPUT__FILE__NAME (type: string), ROW__ID (type: struct) + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int), p_retailprice (type: double) auto parallelism: false Path -> Alias: #### A masked pattern was here #### @@ -1224,33 +1218,30 @@ STAGE PLANS: name: default.part_orc Truncated Path -> Alias: /part_orc [part_orc] + Execution mode: vectorized Needs Tagging: false Reduce Operator Tree: Extract Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE PTF Operator Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int), _col7 (type: double) - outputColumnNames: _col1, _col2, _col5, _col7 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 + File Output Operator + compressed: false + GlobalTableId: 0 #### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col1,_col2,_col5,_col7 - columns.types string,string,int,double - escape.delim \ - serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col1,_col2,_col5,_col7 + columns.types string,string,int,double + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Map Reduce @@ -1504,7 +1495,7 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string), BLOCK__OFFSET__INSIDE__FILE (type: bigint), INPUT__FILE__NAME (type: string), ROW__ID (type: struct) + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int) auto parallelism: false Path -> Alias: #### A masked pattern was here #### @@ -1555,33 +1546,30 @@ STAGE PLANS: name: default.part_orc Truncated Path -> Alias: /part_orc [part_orc] + Execution mode: vectorized Needs Tagging: false Reduce Operator Tree: Extract Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE PTF Operator Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) - outputColumnNames: _col1, _col2, _col5 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 + File Output Operator + compressed: false + GlobalTableId: 0 #### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col1,_col2,_col5 - columns.types string,string,int - escape.delim \ - serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col1,_col2,_col5 + columns.types string,string,int + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Map Reduce @@ -1845,7 +1833,7 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string), BLOCK__OFFSET__INSIDE__FILE (type: bigint), INPUT__FILE__NAME (type: string), ROW__ID (type: struct) + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int) auto parallelism: false Path -> Alias: #### A masked pattern was here #### @@ -1896,6 +1884,7 @@ STAGE PLANS: name: default.part_orc Truncated Path -> Alias: /part_orc [part_orc] + Execution mode: vectorized Needs Tagging: false Reduce Operator Tree: Extract @@ -2198,7 +2187,7 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string), BLOCK__OFFSET__INSIDE__FILE (type: bigint), INPUT__FILE__NAME (type: string), ROW__ID (type: struct) + value expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) auto parallelism: false Path -> Alias: #### A masked pattern was here #### @@ -2249,37 +2238,34 @@ STAGE PLANS: name: default.part_orc Truncated Path -> Alias: /part_orc [part_orc] + Execution mode: vectorized Needs Tagging: false Reduce Operator Tree: Extract Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE PTF Operator Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Filter Operator - isSamplingPred: false - predicate: _col0 is not null (type: boolean) - Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 + Filter Operator + isSamplingPred: false + predicate: _col0 is not null (type: boolean) + Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 #### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8 - columns.types int,string,string,string,string,int,string,double,string - escape.delim \ - serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8 + columns.types int,string,string,string,string,int,string,double,string + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Map Reduce @@ -2545,7 +2531,7 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string), BLOCK__OFFSET__INSIDE__FILE (type: bigint), INPUT__FILE__NAME (type: string), ROW__ID (type: struct) + value expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) auto parallelism: false Path -> Alias: #### A masked pattern was here #### @@ -2596,37 +2582,34 @@ STAGE PLANS: name: default.part_orc Truncated Path -> Alias: /part_orc [part_orc] + Execution mode: vectorized Needs Tagging: false Reduce Operator Tree: Extract Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE PTF Operator Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Filter Operator - isSamplingPred: false - predicate: _col0 is not null (type: boolean) - Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 + Filter Operator + isSamplingPred: false + predicate: _col0 is not null (type: boolean) + Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 #### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8 - columns.types int,string,string,string,string,int,string,double,string - escape.delim \ - serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8 + columns.types int,string,string,string,string,int,string,double,string + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-1 Map Reduce @@ -2903,7 +2886,7 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string), BLOCK__OFFSET__INSIDE__FILE (type: bigint), INPUT__FILE__NAME (type: string), ROW__ID (type: struct) + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int) auto parallelism: false Path -> Alias: #### A masked pattern was here #### @@ -2960,27 +2943,23 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE PTF Operator Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) - outputColumnNames: _col1, _col2, _col5 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 + File Output Operator + compressed: false + GlobalTableId: 0 #### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col1,_col2,_col5 - columns.types string,string,int - escape.delim \ - serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col1,_col2,_col5 + columns.types string,string,int + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Map Reduce @@ -3222,7 +3201,7 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string), BLOCK__OFFSET__INSIDE__FILE (type: bigint), INPUT__FILE__NAME (type: string), ROW__ID (type: struct) + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int), p_retailprice (type: double) auto parallelism: false Path -> Alias: #### A masked pattern was here #### @@ -3279,27 +3258,23 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE PTF Operator Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int), _col7 (type: double) - outputColumnNames: _col1, _col2, _col5, _col7 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col1,_col2,_col5,_col7 - columns.types string,string,int,double - escape.delim \ - serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col1,_col2,_col5,_col7 + columns.types string,string,int,double + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Map Reduce @@ -3544,7 +3519,7 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string), BLOCK__OFFSET__INSIDE__FILE (type: bigint), INPUT__FILE__NAME (type: string), ROW__ID (type: struct) + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int), p_retailprice (type: double) auto parallelism: false Path -> Alias: #### A masked pattern was here #### @@ -3595,33 +3570,30 @@ STAGE PLANS: name: default.part_orc Truncated Path -> Alias: /part_orc [part_orc] + Execution mode: vectorized Needs Tagging: false Reduce Operator Tree: Extract Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE PTF Operator Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int), _col7 (type: double) - outputColumnNames: _col1, _col2, _col5, _col7 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 + File Output Operator + compressed: false + GlobalTableId: 0 #### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col1,_col2,_col5,_col7 - columns.types string,string,int,double - escape.delim \ - serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col1,_col2,_col5,_col7 + columns.types string,string,int,double + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Map Reduce @@ -3876,7 +3848,7 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string), BLOCK__OFFSET__INSIDE__FILE (type: bigint), INPUT__FILE__NAME (type: string), ROW__ID (type: struct) + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int), p_retailprice (type: double) auto parallelism: false Path -> Alias: #### A masked pattern was here #### @@ -3927,6 +3899,7 @@ STAGE PLANS: name: default.part_orc Truncated Path -> Alias: /part_orc [part_orc] + Execution mode: vectorized Needs Tagging: false Reduce Operator Tree: Extract @@ -3944,8 +3917,8 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: - columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11 - columns.types int,string,string,string,string,int,string,double,string,bigint,string,struct + columns _col1,_col2,_col5,_col7 + columns.types string,string,int,double escape.delim \ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe @@ -3964,7 +3937,7 @@ STAGE PLANS: Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: bigint), _col10 (type: string), _col11 (type: struct) + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int), _col7 (type: double) auto parallelism: false Path -> Alias: #### A masked pattern was here #### @@ -3975,8 +3948,8 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: - columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11 - columns.types int,string,string,string,string,int,string,double,string,bigint,string,struct + columns _col1,_col2,_col5,_col7 + columns.types string,string,int,double escape.delim \ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe @@ -3984,8 +3957,8 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: - columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11 - columns.types int,string,string,string,string,int,string,double,string,bigint,string,struct + columns _col1,_col2,_col5,_col7 + columns.types string,string,int,double escape.delim \ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe @@ -3997,27 +3970,23 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE PTF Operator Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int), _col7 (type: double) - outputColumnNames: _col1, _col2, _col5, _col7 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 + File Output Operator + compressed: false + GlobalTableId: 0 #### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col1,_col2,_col5,_col7 - columns.types string,string,int,double - escape.delim \ - serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col1,_col2,_col5,_col7 + columns.types string,string,int,double + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-3 Map Reduce @@ -4292,7 +4261,7 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string), BLOCK__OFFSET__INSIDE__FILE (type: bigint), INPUT__FILE__NAME (type: string), ROW__ID (type: struct) + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int), p_retailprice (type: double) auto parallelism: false Path -> Alias: #### A masked pattern was here #### @@ -4343,33 +4312,30 @@ STAGE PLANS: name: default.part_orc Truncated Path -> Alias: /part_orc [sub1:part_orc] + Execution mode: vectorized Needs Tagging: false Reduce Operator Tree: Extract Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE PTF Operator Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int), _col7 (type: double) - outputColumnNames: _col1, _col2, _col5, _col7 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 + File Output Operator + compressed: false + GlobalTableId: 0 #### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col1,_col2,_col5,_col7 - columns.types string,string,int,double - escape.delim \ - serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col1,_col2,_col5,_col7 + columns.types string,string,int,double + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Map Reduce @@ -4721,7 +4687,7 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string), BLOCK__OFFSET__INSIDE__FILE (type: bigint), INPUT__FILE__NAME (type: string), ROW__ID (type: struct) + value expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_size (type: int), p_retailprice (type: double) auto parallelism: false Path -> Alias: #### A masked pattern was here #### @@ -4772,37 +4738,34 @@ STAGE PLANS: name: default.part_orc Truncated Path -> Alias: /part_orc [part_orc] + Execution mode: vectorized Needs Tagging: false Reduce Operator Tree: Extract Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE PTF Operator Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col5 (type: int), _col7 (type: double) - outputColumnNames: _col0, _col1, _col2, _col5, _col7 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Filter Operator - isSamplingPred: false - predicate: _col0 is not null (type: boolean) - Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 + Filter Operator + isSamplingPred: false + predicate: _col0 is not null (type: boolean) + Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 #### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col2,_col5,_col7 - columns.types int,string,string,int,double - escape.delim \ - serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col5,_col7 + columns.types int,string,string,int,double + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Map Reduce @@ -5130,7 +5093,7 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string), BLOCK__OFFSET__INSIDE__FILE (type: bigint), INPUT__FILE__NAME (type: string), ROW__ID (type: struct) + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int) auto parallelism: false Path -> Alias: #### A masked pattern was here #### @@ -5181,6 +5144,7 @@ STAGE PLANS: name: default.part_orc Truncated Path -> Alias: /part_orc [part_orc] + Execution mode: vectorized Needs Tagging: false Reduce Operator Tree: Extract @@ -5980,12 +5944,12 @@ TOK_QUERY STAGE DEPENDENCIES: Stage-2 is a root stage Stage-3 depends on stages: Stage-2 - Stage-4 depends on stages: Stage-3 - Stage-1 depends on stages: Stage-4 - Stage-5 depends on stages: Stage-1 - Stage-6 depends on stages: Stage-2 - Stage-0 depends on stages: Stage-6 - Stage-7 depends on stages: Stage-0 + Stage-0 depends on stages: Stage-3 + Stage-4 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-2 + Stage-6 depends on stages: Stage-5 + Stage-1 depends on stages: Stage-6 + Stage-7 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-2 @@ -6001,7 +5965,7 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string), BLOCK__OFFSET__INSIDE__FILE (type: bigint), INPUT__FILE__NAME (type: string), ROW__ID (type: struct) + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int), p_retailprice (type: double) auto parallelism: false Path -> Alias: #### A masked pattern was here #### @@ -6052,6 +6016,7 @@ STAGE PLANS: name: default.part_orc Truncated Path -> Alias: /part_orc [part_orc] + Execution mode: vectorized Needs Tagging: false Reduce Operator Tree: Extract @@ -6067,82 +6032,14 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: - columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11 - columns.types int,string,string,string,string,int,string,double,string,bigint,string,struct + columns _col1,_col2,_col5,_col7 + columns.types string,string,int,double escape.delim \ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe TotalFiles: 1 GatherStats: false MultiFileSpray: false - Select Operator - expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int), _col7 (type: double) - outputColumnNames: _col1, _col2, _col5, _col7 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col1,_col2,_col5,_col7 - columns.types string,string,int,double - escape.delim \ - serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - - Stage: Stage-3 - Map Reduce - Map Operator Tree: - TableScan - GatherStats: false - Reduce Output Operator - key expressions: _col2 (type: string), _col5 (type: int) - sort order: ++ - Map-reduce partition columns: _col2 (type: string) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col1 (type: string) - auto parallelism: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: -mr-10002 - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11 - columns.types int,string,string,string,string,int,string,double,string,bigint,string,struct - escape.delim \ - serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11 - columns.types int,string,string,string,string,int,string,double,string,bigint,string,struct - escape.delim \ - serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Truncated Path -> Alias: -#### A masked pattern was here #### - Needs Tagging: false - Reduce Operator Tree: - Select Operator - expressions: VALUE._col1 (type: string), KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: int) - outputColumnNames: _col1, _col2, _col5 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - PTF Operator - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 @@ -6152,8 +6049,8 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: - columns _wcol0,_col1,_col2,_col5 - columns.types bigint,string,string,int + columns _col1,_col2,_col5,_col7 + columns.types string,string,int,double escape.delim \ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe @@ -6161,30 +6058,30 @@ STAGE PLANS: GatherStats: false MultiFileSpray: false - Stage: Stage-4 + Stage: Stage-3 Map Reduce Map Operator Tree: TableScan GatherStats: false Reduce Output Operator - key expressions: _col2 (type: string), _col2 (type: string), _col1 (type: string) - sort order: +++ + key expressions: _col2 (type: string), _col1 (type: string) + sort order: ++ Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: _wcol0 (type: bigint), _col5 (type: int) + value expressions: _col5 (type: int), _col7 (type: double) auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition - base file name: -mr-10003 + base file name: -mr-10002 input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: - columns _wcol0,_col1,_col2,_col5 - columns.types bigint,string,string,int + columns _col1,_col2,_col5,_col7 + columns.types string,string,int,double escape.delim \ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe @@ -6192,8 +6089,8 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: - columns _wcol0,_col1,_col2,_col5 - columns.types bigint,string,string,int + columns _col1,_col2,_col5,_col7 + columns.types string,string,int,double escape.delim \ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe @@ -6202,18 +6099,18 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Select Operator - expressions: VALUE._col0 (type: bigint), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col4 (type: int) - outputColumnNames: _col0, _col2, _col3, _col6 + expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) + outputColumnNames: _col1, _col2, _col5, _col7 Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE PTF Operator Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col3 (type: string), _col2 (type: string), _col6 (type: int), UDFToInteger(round(_col0, 1)) (type: int), _wcol1 (type: int), _wcol2 (type: int), _wcol3 (type: double), _wcol4 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 2 + GlobalTableId: 1 #### A masked pattern was here #### NumFilesPerFileSink: 1 Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE @@ -6223,22 +6120,22 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: bucket_count -1 - columns p_mfgr,p_name,p_size,s2,r,dr,cud,fv1 + columns p_mfgr,p_name,p_size,r,dr,s columns.comments - columns.types string:string:int:int:int:int:double:int + columns.types string:string:int:int:int:double #### A masked pattern was here #### - name default.part_5 - serialization.ddl struct part_5 { string p_mfgr, string p_name, i32 p_size, i32 s2, i32 r, i32 dr, double cud, i32 fv1} + name default.part_4 + serialization.ddl struct part_4 { string p_mfgr, string p_name, i32 p_size, i32 r, i32 dr, double s} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.part_5 + name: default.part_4 TotalFiles: 1 GatherStats: true MultiFileSpray: false - Stage: Stage-1 + Stage: Stage-0 Move Operator tables: replace: true @@ -6248,41 +6145,41 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: bucket_count -1 - columns p_mfgr,p_name,p_size,s2,r,dr,cud,fv1 + columns p_mfgr,p_name,p_size,r,dr,s columns.comments - columns.types string:string:int:int:int:int:double:int + columns.types string:string:int:int:int:double #### A masked pattern was here #### - name default.part_5 - serialization.ddl struct part_5 { string p_mfgr, string p_name, i32 p_size, i32 s2, i32 r, i32 dr, double cud, i32 fv1} + name default.part_4 + serialization.ddl struct part_4 { string p_mfgr, string p_name, i32 p_size, i32 r, i32 dr, double s} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.part_5 + name: default.part_4 - Stage: Stage-5 + Stage: Stage-4 Stats-Aggr Operator #### A masked pattern was here #### - Stage: Stage-6 + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan GatherStats: false Reduce Output Operator - key expressions: _col2 (type: string), _col1 (type: string) + key expressions: _col2 (type: string), _col5 (type: int) sort order: ++ Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: _col5 (type: int), _col7 (type: double) + value expressions: _col1 (type: string) auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition - base file name: -mr-10004 + base file name: -mr-10003 input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: @@ -6305,18 +6202,82 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Select Operator - expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) - outputColumnNames: _col1, _col2, _col5, _col7 + expressions: VALUE._col1 (type: string), KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: int) + outputColumnNames: _col1, _col2, _col5 + Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _wcol0,_col1,_col2,_col5 + columns.types bigint,string,string,int + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + GatherStats: false + Reduce Output Operator + key expressions: _col2 (type: string), _col2 (type: string), _col1 (type: string) + sort order: +++ + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _wcol0 (type: bigint), _col5 (type: int) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: -mr-10004 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _wcol0,_col1,_col2,_col5 + columns.types bigint,string,string,int + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _wcol0,_col1,_col2,_col5 + columns.types bigint,string,string,int + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Truncated Path -> Alias: +#### A masked pattern was here #### + Needs Tagging: false + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: bigint), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col4 (type: int) + outputColumnNames: _col0, _col2, _col3, _col6 Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE PTF Operator Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: double) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + expressions: _col3 (type: string), _col2 (type: string), _col6 (type: int), UDFToInteger(round(_col0, 1)) (type: int), _wcol1 (type: int), _wcol2 (type: int), _wcol3 (type: double), _wcol4 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + GlobalTableId: 2 #### A masked pattern was here #### NumFilesPerFileSink: 1 Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE @@ -6326,22 +6287,22 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: bucket_count -1 - columns p_mfgr,p_name,p_size,r,dr,s + columns p_mfgr,p_name,p_size,s2,r,dr,cud,fv1 columns.comments - columns.types string:string:int:int:int:double + columns.types string:string:int:int:int:int:double:int #### A masked pattern was here #### - name default.part_4 - serialization.ddl struct part_4 { string p_mfgr, string p_name, i32 p_size, i32 r, i32 dr, double s} + name default.part_5 + serialization.ddl struct part_5 { string p_mfgr, string p_name, i32 p_size, i32 s2, i32 r, i32 dr, double cud, i32 fv1} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.part_4 + name: default.part_5 TotalFiles: 1 GatherStats: true MultiFileSpray: false - Stage: Stage-0 + Stage: Stage-1 Move Operator tables: replace: true @@ -6351,17 +6312,17 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: bucket_count -1 - columns p_mfgr,p_name,p_size,r,dr,s + columns p_mfgr,p_name,p_size,s2,r,dr,cud,fv1 columns.comments - columns.types string:string:int:int:int:double + columns.types string:string:int:int:int:int:double:int #### A masked pattern was here #### - name default.part_4 - serialization.ddl struct part_4 { string p_mfgr, string p_name, i32 p_size, i32 r, i32 dr, double s} + name default.part_5 + serialization.ddl struct part_5 { string p_mfgr, string p_name, i32 p_size, i32 s2, i32 r, i32 dr, double cud, i32 fv1} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.part_4 + name: default.part_5 Stage: Stage-7 Stats-Aggr Operator @@ -6647,7 +6608,7 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string), BLOCK__OFFSET__INSIDE__FILE (type: bigint), INPUT__FILE__NAME (type: string), ROW__ID (type: struct) + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int) auto parallelism: false Path -> Alias: #### A masked pattern was here #### @@ -6698,6 +6659,7 @@ STAGE PLANS: name: default.part_orc Truncated Path -> Alias: /part_orc [part_orc] + Execution mode: vectorized Needs Tagging: false Reduce Operator Tree: Extract @@ -6715,8 +6677,8 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: - columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11 - columns.types int,string,string,string,string,int,string,double,string,bigint,string,struct + columns _col1,_col2,_col5 + columns.types string,string,int escape.delim \ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe @@ -6735,7 +6697,7 @@ STAGE PLANS: Map-reduce partition columns: _col2 (type: string), _col1 (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: bigint), _col10 (type: string), _col11 (type: struct) + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) auto parallelism: false Path -> Alias: #### A masked pattern was here #### @@ -6746,8 +6708,8 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: - columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11 - columns.types int,string,string,string,string,int,string,double,string,bigint,string,struct + columns _col1,_col2,_col5 + columns.types string,string,int escape.delim \ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe @@ -6755,8 +6717,8 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: - columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11 - columns.types int,string,string,string,string,int,string,double,string,bigint,string,struct + columns _col1,_col2,_col5 + columns.types string,string,int escape.delim \ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe @@ -6768,27 +6730,23 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE PTF Operator Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) - outputColumnNames: _col1, _col2, _col5 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 + File Output Operator + compressed: false + GlobalTableId: 0 #### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col1,_col2,_col5 - columns.types string,string,int - escape.delim \ - serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col1,_col2,_col5 + columns.types string,string,int + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-3 Map Reduce @@ -7094,7 +7052,7 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string), BLOCK__OFFSET__INSIDE__FILE (type: bigint), INPUT__FILE__NAME (type: string), ROW__ID (type: struct) + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int) auto parallelism: false Path -> Alias: #### A masked pattern was here #### @@ -7145,6 +7103,7 @@ STAGE PLANS: name: default.part_orc Truncated Path -> Alias: /part_orc [part_orc] + Execution mode: vectorized Needs Tagging: false Reduce Operator Tree: Extract @@ -7160,8 +7119,8 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: - columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11 - columns.types int,string,string,string,string,int,string,double,string,bigint,string,struct + columns _col1,_col2,_col5 + columns.types string,string,int escape.delim \ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe @@ -7180,7 +7139,7 @@ STAGE PLANS: Map-reduce partition columns: _col2 (type: string), _col1 (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: bigint), _col10 (type: string), _col11 (type: struct) + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) auto parallelism: false Path -> Alias: #### A masked pattern was here #### @@ -7191,8 +7150,8 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: - columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11 - columns.types int,string,string,string,string,int,string,double,string,bigint,string,struct + columns _col1,_col2,_col5 + columns.types string,string,int escape.delim \ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe @@ -7200,8 +7159,8 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: - columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11 - columns.types int,string,string,string,string,int,string,double,string,bigint,string,struct + columns _col1,_col2,_col5 + columns.types string,string,int escape.delim \ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe @@ -7222,8 +7181,8 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: - columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11 - columns.types int,string,string,string,string,int,string,double,string,bigint,string,struct + columns _col1,_col2,_col5 + columns.types string,string,int escape.delim \ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe @@ -7242,7 +7201,7 @@ STAGE PLANS: Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: bigint), _col10 (type: string), _col11 (type: struct) + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) auto parallelism: false Path -> Alias: #### A masked pattern was here #### @@ -7253,8 +7212,8 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: - columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11 - columns.types int,string,string,string,string,int,string,double,string,bigint,string,struct + columns _col1,_col2,_col5 + columns.types string,string,int escape.delim \ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe @@ -7262,8 +7221,8 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: - columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11 - columns.types int,string,string,string,string,int,string,double,string,bigint,string,struct + columns _col1,_col2,_col5 + columns.types string,string,int escape.delim \ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe @@ -7275,27 +7234,23 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE PTF Operator Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) - outputColumnNames: _col1, _col2, _col5 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 + File Output Operator + compressed: false + GlobalTableId: 0 #### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col1,_col2,_col5 - columns.types string,string,int - escape.delim \ - serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col1,_col2,_col5 + columns.types string,string,int + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-4 Map Reduce @@ -7584,7 +7539,7 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string), p_name (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string), BLOCK__OFFSET__INSIDE__FILE (type: bigint), INPUT__FILE__NAME (type: string), ROW__ID (type: struct) + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int) auto parallelism: false Path -> Alias: #### A masked pattern was here #### @@ -7635,6 +7590,7 @@ STAGE PLANS: name: default.part_orc Truncated Path -> Alias: /part_orc [part_orc] + Execution mode: vectorized Needs Tagging: false Reduce Operator Tree: Extract @@ -7650,8 +7606,8 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: - columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11 - columns.types int,string,string,string,string,int,string,double,string,bigint,string,struct + columns _col1,_col2,_col5 + columns.types string,string,int escape.delim \ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe @@ -7670,7 +7626,7 @@ STAGE PLANS: Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: bigint), _col10 (type: string), _col11 (type: struct) + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) auto parallelism: false Path -> Alias: #### A masked pattern was here #### @@ -7681,8 +7637,8 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: - columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11 - columns.types int,string,string,string,string,int,string,double,string,bigint,string,struct + columns _col1,_col2,_col5 + columns.types string,string,int escape.delim \ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe @@ -7690,8 +7646,8 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: - columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11 - columns.types int,string,string,string,string,int,string,double,string,bigint,string,struct + columns _col1,_col2,_col5 + columns.types string,string,int escape.delim \ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe @@ -7703,27 +7659,23 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE PTF Operator Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) - outputColumnNames: _col1, _col2, _col5 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 + File Output Operator + compressed: false + GlobalTableId: 0 #### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col1,_col2,_col5 - columns.types string,string,int - escape.delim \ - serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col1,_col2,_col5 + columns.types string,string,int + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-3 Map Reduce @@ -8024,7 +7976,7 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string), p_name (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string), BLOCK__OFFSET__INSIDE__FILE (type: bigint), INPUT__FILE__NAME (type: string), ROW__ID (type: struct) + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int) auto parallelism: false Path -> Alias: #### A masked pattern was here #### @@ -8075,6 +8027,7 @@ STAGE PLANS: name: default.part_orc Truncated Path -> Alias: /part_orc [part_orc] + Execution mode: vectorized Needs Tagging: false Reduce Operator Tree: Extract @@ -8090,8 +8043,8 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: - columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11 - columns.types int,string,string,string,string,int,string,double,string,bigint,string,struct + columns _col1,_col2,_col5 + columns.types string,string,int escape.delim \ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe @@ -8110,7 +8063,7 @@ STAGE PLANS: Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: bigint), _col10 (type: string), _col11 (type: struct) + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) auto parallelism: false Path -> Alias: #### A masked pattern was here #### @@ -8121,8 +8074,8 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: - columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11 - columns.types int,string,string,string,string,int,string,double,string,bigint,string,struct + columns _col1,_col2,_col5 + columns.types string,string,int escape.delim \ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe @@ -8130,8 +8083,8 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: - columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11 - columns.types int,string,string,string,string,int,string,double,string,bigint,string,struct + columns _col1,_col2,_col5 + columns.types string,string,int escape.delim \ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe @@ -8154,8 +8107,8 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: - columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11 - columns.types int,string,string,string,string,int,string,double,string,bigint,string,struct + columns _col1,_col2,_col5 + columns.types string,string,int escape.delim \ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe @@ -8174,7 +8127,7 @@ STAGE PLANS: Map-reduce partition columns: _col2 (type: string), _col1 (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: bigint), _col10 (type: string), _col11 (type: struct) + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) auto parallelism: false Path -> Alias: #### A masked pattern was here #### @@ -8185,8 +8138,8 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: - columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11 - columns.types int,string,string,string,string,int,string,double,string,bigint,string,struct + columns _col1,_col2,_col5 + columns.types string,string,int escape.delim \ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe @@ -8194,8 +8147,8 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: - columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11 - columns.types int,string,string,string,string,int,string,double,string,bigint,string,struct + columns _col1,_col2,_col5 + columns.types string,string,int escape.delim \ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe @@ -8207,27 +8160,23 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE PTF Operator Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) - outputColumnNames: _col1, _col2, _col5 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 + File Output Operator + compressed: false + GlobalTableId: 0 #### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col1,_col2,_col5 - columns.types string,string,int - escape.delim \ - serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col1,_col2,_col5 + columns.types string,string,int + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-4 Map Reduce @@ -8559,7 +8508,7 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string), p_name (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string), BLOCK__OFFSET__INSIDE__FILE (type: bigint), INPUT__FILE__NAME (type: string), ROW__ID (type: struct) + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int) auto parallelism: false Path -> Alias: #### A masked pattern was here #### @@ -8610,6 +8559,7 @@ STAGE PLANS: name: default.part_orc Truncated Path -> Alias: /part_orc [part_orc] + Execution mode: vectorized Needs Tagging: false Reduce Operator Tree: Extract @@ -8627,8 +8577,8 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: - columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11 - columns.types int,string,string,string,string,int,string,double,string,bigint,string,struct + columns _col1,_col2,_col5 + columns.types string,string,int escape.delim \ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe @@ -8647,7 +8597,7 @@ STAGE PLANS: Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: bigint), _col10 (type: string), _col11 (type: struct) + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) auto parallelism: false Path -> Alias: #### A masked pattern was here #### @@ -8658,8 +8608,8 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: - columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11 - columns.types int,string,string,string,string,int,string,double,string,bigint,string,struct + columns _col1,_col2,_col5 + columns.types string,string,int escape.delim \ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe @@ -8667,8 +8617,8 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: - columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11 - columns.types int,string,string,string,string,int,string,double,string,bigint,string,struct + columns _col1,_col2,_col5 + columns.types string,string,int escape.delim \ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe @@ -8680,27 +8630,23 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE PTF Operator Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) - outputColumnNames: _col1, _col2, _col5 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 + File Output Operator + compressed: false + GlobalTableId: 0 #### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col1,_col2,_col5 - columns.types string,string,int - escape.delim \ - serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col1,_col2,_col5 + columns.types string,string,int + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-3 Map Reduce @@ -8998,7 +8944,7 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string), p_name (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string), BLOCK__OFFSET__INSIDE__FILE (type: bigint), INPUT__FILE__NAME (type: string), ROW__ID (type: struct) + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int) auto parallelism: false Path -> Alias: #### A masked pattern was here #### @@ -9049,6 +8995,7 @@ STAGE PLANS: name: default.part_orc Truncated Path -> Alias: /part_orc [part_orc] + Execution mode: vectorized Needs Tagging: false Reduce Operator Tree: Extract @@ -9066,8 +9013,8 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: - columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11 - columns.types int,string,string,string,string,int,string,double,string,bigint,string,struct + columns _col1,_col2,_col5 + columns.types string,string,int escape.delim \ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe @@ -9086,7 +9033,7 @@ STAGE PLANS: Map-reduce partition columns: _col2 (type: string), _col1 (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: bigint), _col10 (type: string), _col11 (type: struct) + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) auto parallelism: false Path -> Alias: #### A masked pattern was here #### @@ -9097,8 +9044,8 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: - columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11 - columns.types int,string,string,string,string,int,string,double,string,bigint,string,struct + columns _col1,_col2,_col5 + columns.types string,string,int escape.delim \ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe @@ -9106,8 +9053,8 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: - columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11 - columns.types int,string,string,string,string,int,string,double,string,bigint,string,struct + columns _col1,_col2,_col5 + columns.types string,string,int escape.delim \ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe @@ -9119,27 +9066,23 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE PTF Operator Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) - outputColumnNames: _col1, _col2, _col5 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 + File Output Operator + compressed: false + GlobalTableId: 0 #### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col1,_col2,_col5 - columns.types string,string,int - escape.delim \ - serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col1,_col2,_col5 + columns.types string,string,int + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-3 Map Reduce