diff --git itests/src/test/resources/testconfiguration.properties itests/src/test/resources/testconfiguration.properties index 917b3a4..5447783 100644 --- itests/src/test/resources/testconfiguration.properties +++ itests/src/test/resources/testconfiguration.properties @@ -139,6 +139,7 @@ minitez.query.files.shared=alter_merge_2_orc.q,\ orc_vectorization_ppd.q,\ parallel.q,\ ptf.q,\ + ptf_streaming.q,\ sample1.q,\ script_env_var1.q,\ script_env_var2.q,\ diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcFactory.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcFactory.java index afd1738..0df63e3 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcFactory.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcFactory.java @@ -62,6 +62,7 @@ import org.apache.hadoop.hive.ql.plan.ptf.PTFExpressionDef; import org.apache.hadoop.hive.ql.plan.ptf.WindowFunctionDef; import org.apache.hadoop.hive.ql.plan.ptf.WindowTableFunctionDef; +import org.apache.hadoop.hive.ql.udf.ptf.Noop; import org.apache.hadoop.hive.serde2.objectinspector.StructField; import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; @@ -255,26 +256,28 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx ctx, PTFDesc conf = op.getConf(); //Since we cannot know what columns will be needed by a PTF chain, //we do not prune columns on PTFOperator for PTF chains. - if (!conf.forWindowing()) { + if (!conf.forWindowing() && !Noop.class.isInstance(conf.getFuncDef().getTFunction())) { return super.process(nd, stack, cppCtx, nodeOutputs); } - - WindowTableFunctionDef def = (WindowTableFunctionDef) conf.getFuncDef(); - ArrayList sig = new ArrayList(); - List prunedCols = cppCtx.getPrunedColList(op.getChildOperators().get(0)); - //we create a copy of prunedCols to create a list of pruned columns for PTFOperator - prunedCols = new ArrayList(prunedCols); - prunedColumnsList(prunedCols, def); RowResolver oldRR = cppCtx.getOpToParseCtxMap().get(op).getRowResolver(); + + WindowTableFunctionDef def = null; + if (conf.forWindowing()) { + def = (WindowTableFunctionDef) conf.getFuncDef(); + prunedCols = prunedColumnsList(prunedCols, def); + } + ArrayList sig = new ArrayList(); RowResolver newRR = buildPrunedRR(prunedCols, oldRR, sig); - cppCtx.getPrunedColLists().put(op, prunedInputList(prunedCols, def)); cppCtx.getOpToParseCtxMap().get(op).setRowResolver(newRR); op.getSchema().setSignature(sig); + + prunedCols = def == null ? prunedCols : prunedInputList(prunedCols, def); + cppCtx.getPrunedColLists().put(op, prunedCols); return null; } - private static RowResolver buildPrunedRR(List prunedCols, + private RowResolver buildPrunedRR(List prunedCols, RowResolver oldRR, ArrayList sig) throws SemanticException{ RowResolver newRR = new RowResolver(); HashSet prunedColsSet = new HashSet(prunedCols); @@ -291,7 +294,10 @@ private static RowResolver buildPrunedRR(List prunedCols, /* * add any input columns referenced in WindowFn args or expressions. */ - private void prunedColumnsList(List prunedCols, WindowTableFunctionDef tDef) { + private ArrayList prunedColumnsList(List prunedCols, + WindowTableFunctionDef tDef) { + //we create a copy of prunedCols to create a list of pruned columns for PTFOperator + ArrayList mergedColList = new ArrayList(prunedCols); if ( tDef.getWindowFunctions() != null ) { for(WindowFunctionDef wDef : tDef.getWindowFunctions() ) { if ( wDef.getArgs() == null) { @@ -299,22 +305,23 @@ private void prunedColumnsList(List prunedCols, WindowTableFunctionDef t } for(PTFExpressionDef arg : wDef.getArgs()) { ExprNodeDesc exprNode = arg.getExprNode(); - Utilities.mergeUniqElems(prunedCols, exprNode.getCols()); + Utilities.mergeUniqElems(mergedColList, exprNode.getCols()); } } } if(tDef.getPartition() != null){ for(PTFExpressionDef col : tDef.getPartition().getExpressions()){ ExprNodeDesc exprNode = col.getExprNode(); - Utilities.mergeUniqElems(prunedCols, exprNode.getCols()); + Utilities.mergeUniqElems(mergedColList, exprNode.getCols()); } } if(tDef.getOrder() != null){ for(PTFExpressionDef col : tDef.getOrder().getExpressions()){ ExprNodeDesc exprNode = col.getExprNode(); - Utilities.mergeUniqElems(prunedCols, exprNode.getCols()); + Utilities.mergeUniqElems(mergedColList, exprNode.getCols()); } } + return mergedColList; } /* diff --git ql/src/java/org/apache/hadoop/hive/ql/ppd/PredicatePushDown.java ql/src/java/org/apache/hadoop/hive/ql/ppd/PredicatePushDown.java index ee7328e..7f26f0f 100644 --- ql/src/java/org/apache/hadoop/hive/ql/ppd/PredicatePushDown.java +++ ql/src/java/org/apache/hadoop/hive/ql/ppd/PredicatePushDown.java @@ -134,7 +134,9 @@ public ParseContext transform(ParseContext pctx) throws SemanticException { topNodes.addAll(pGraphContext.getTopOps().values()); ogw.startWalking(topNodes, null); - LOG.debug("After PPD:\n" + Operator.toString(pctx.getTopOps().values())); + if (LOG.isDebugEnabled()) { + LOG.debug("After PPD:\n" + Operator.toString(pctx.getTopOps().values())); + } return pGraphContext; } diff --git ql/src/test/queries/clientpositive/ptf.q ql/src/test/queries/clientpositive/ptf.q index 56eef0a..394c227 100644 --- ql/src/test/queries/clientpositive/ptf.q +++ ql/src/test/queries/clientpositive/ptf.q @@ -1,6 +1,16 @@ -- SORT_QUERY_RESULTS --1. test1 +explain +select p_mfgr, p_name, p_size, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +sum(p_retailprice) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s1 +from noop(on part + partition by p_mfgr + order by p_name + ); + select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -11,6 +21,14 @@ from noop(on part ); -- 2. testJoinWithNoop +explain +select p_mfgr, p_name, +p_size, p_size - lag(p_size,1,p_size) over (partition by p_mfgr order by p_name) as deltaSz +from noop (on (select p1.* from part p1 join part p2 on p1.p_partkey = p2.p_partkey) j +distribute by j.p_mfgr +sort by j.p_name) +; + select p_mfgr, p_name, p_size, p_size - lag(p_size,1,p_size) over (partition by p_mfgr order by p_name) as deltaSz from noop (on (select p1.* from part p1 join part p2 on p1.p_partkey = p2.p_partkey) j @@ -19,12 +37,28 @@ sort by j.p_name) ; -- 3. testOnlyPTF +explain +select p_mfgr, p_name, p_size +from noop(on part +partition by p_mfgr +order by p_name); + select p_mfgr, p_name, p_size from noop(on part partition by p_mfgr order by p_name); -- 4. testPTFAlias +explain +select p_mfgr, p_name, p_size, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +sum(p_retailprice) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s1 +from noop(on part + partition by p_mfgr + order by p_name + ) abc; + select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -35,6 +69,17 @@ from noop(on part ) abc; -- 5. testPTFAndWhereWithWindowing +explain +select p_mfgr, p_name, p_size, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +p_size, p_size - lag(p_size,1,p_size) over (partition by p_mfgr order by p_name) as deltaSz +from noop(on part + partition by p_mfgr + order by p_name + ) +; + select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -46,6 +91,18 @@ from noop(on part ; -- 6. testSWQAndPTFAndGBy +explain +select p_mfgr, p_name, p_size, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +p_size, p_size - lag(p_size,1,p_size) over (partition by p_mfgr order by p_name) as deltaSz +from noop(on part + partition by p_mfgr + order by p_name + ) +group by p_mfgr, p_name, p_size +; + select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -58,6 +115,13 @@ group by p_mfgr, p_name, p_size ; -- 7. testJoin +explain +select abc.* +from noop(on part +partition by p_mfgr +order by p_name +) abc join part p1 on abc.p_partkey = p1.p_partkey; + select abc.* from noop(on part partition by p_mfgr @@ -65,6 +129,13 @@ order by p_name ) abc join part p1 on abc.p_partkey = p1.p_partkey; -- 8. testJoinRight +explain +select abc.* +from part p1 join noop(on part +partition by p_mfgr +order by p_name +) abc on abc.p_partkey = p1.p_partkey; + select abc.* from part p1 join noop(on part partition by p_mfgr @@ -72,6 +143,13 @@ order by p_name ) abc on abc.p_partkey = p1.p_partkey; -- 9. testNoopWithMap +explain +select p_mfgr, p_name, p_size, +rank() over (partition by p_mfgr order by p_name, p_size desc) as r +from noopwithmap(on part +partition by p_mfgr +order by p_name, p_size desc); + select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name, p_size desc) as r from noopwithmap(on part @@ -79,6 +157,15 @@ partition by p_mfgr order by p_name, p_size desc); -- 10. testNoopWithMapWithWindowing +explain +select p_mfgr, p_name, p_size, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +sum(p_retailprice) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s1 +from noopwithmap(on part + partition by p_mfgr + order by p_name); + select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -88,6 +175,16 @@ from noopwithmap(on part order by p_name); -- 11. testHavingWithWindowingPTFNoGBY +explain +select p_mfgr, p_name, p_size, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +sum(p_retailprice) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s1 +from noop(on part +partition by p_mfgr +order by p_name) +; + select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -98,6 +195,16 @@ order by p_name) ; -- 12. testFunctionChain +explain +select p_mfgr, p_name, p_size, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +sum(p_retailprice) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s1 +from noop(on noopwithmap(on noop(on part +partition by p_mfgr +order by p_mfgr, p_name +))); + select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -108,6 +215,19 @@ order by p_mfgr, p_name ))); -- 13. testPTFAndWindowingInSubQ +explain +select p_mfgr, p_name, +sub1.cd, sub1.s1 +from (select p_mfgr, p_name, +count(p_size) over (partition by p_mfgr order by p_name) as cd, +p_retailprice, +sum(p_retailprice) over w1 as s1 +from noop(on part +partition by p_mfgr +order by p_name) +window w1 as (partition by p_mfgr order by p_name rows between 2 preceding and 2 following) +) sub1 ; + select p_mfgr, p_name, sub1.cd, sub1.s1 from (select p_mfgr, p_name, @@ -121,6 +241,19 @@ window w1 as (partition by p_mfgr order by p_name rows between 2 preceding and 2 ) sub1 ; -- 14. testPTFJoinWithWindowingWithCount +explain +select abc.p_mfgr, abc.p_name, +rank() over (distribute by abc.p_mfgr sort by abc.p_name) as r, +dense_rank() over (distribute by abc.p_mfgr sort by abc.p_name) as dr, +count(abc.p_name) over (distribute by abc.p_mfgr sort by abc.p_name) as cd, +abc.p_retailprice, sum(abc.p_retailprice) over (distribute by abc.p_mfgr sort by abc.p_name rows between unbounded preceding and current row) as s1, +abc.p_size, abc.p_size - lag(abc.p_size,1,abc.p_size) over (distribute by abc.p_mfgr sort by abc.p_name) as deltaSz +from noop(on part +partition by p_mfgr +order by p_name +) abc join part p1 on abc.p_partkey = p1.p_partkey +; + select abc.p_mfgr, abc.p_name, rank() over (distribute by abc.p_mfgr sort by abc.p_name) as r, dense_rank() over (distribute by abc.p_mfgr sort by abc.p_name) as dr, @@ -134,6 +267,12 @@ order by p_name ; -- 15. testDistinctInSelectWithPTF +explain +select DISTINCT p_mfgr, p_name, p_size +from noop(on part +partition by p_mfgr +order by p_name); + select DISTINCT p_mfgr, p_name, p_size from noop(on part partition by p_mfgr @@ -147,6 +286,14 @@ sum(p_retailprice) as s from part group by p_mfgr, p_brand; +explain +select p_mfgr, p_brand, s, +sum(s) over w1 as s1 +from noop(on mfgr_price_view +partition by p_mfgr +order by p_mfgr) +window w1 as ( partition by p_mfgr order by p_brand rows between 2 preceding and current row); + select p_mfgr, p_brand, s, sum(s) over w1 as s1 from noop(on mfgr_price_view @@ -173,6 +320,22 @@ dr INT, cud DOUBLE, fv1 INT); +explain +from noop(on part +partition by p_mfgr +order by p_name) +INSERT OVERWRITE TABLE part_4 select p_mfgr, p_name, p_size, +rank() over (distribute by p_mfgr sort by p_name) as r, +dense_rank() over (distribute by p_mfgr sort by p_name) as dr, +sum(p_retailprice) over (distribute by p_mfgr sort by p_name rows between unbounded preceding and current row) as s +INSERT OVERWRITE TABLE part_5 select p_mfgr,p_name, p_size, +round(sum(p_size) over (distribute by p_mfgr sort by p_size range between 5 preceding and current row),1) as s2, +rank() over (distribute by p_mfgr sort by p_mfgr, p_name) as r, +dense_rank() over (distribute by p_mfgr sort by p_mfgr, p_name) as dr, +cume_dist() over (distribute by p_mfgr sort by p_mfgr, p_name) as cud, +first_value(p_size, true) over w1 as fv1 +window w1 as (distribute by p_mfgr sort by p_mfgr, p_name rows between 2 preceding and 2 following); + from noop(on part partition by p_mfgr order by p_name) @@ -193,6 +356,23 @@ select * from part_4; select * from part_5; -- 18. testMulti2OperatorsFunctionChainWithMap +explain +select p_mfgr, p_name, +rank() over (partition by p_mfgr,p_name) as r, +dense_rank() over (partition by p_mfgr,p_name) as dr, +p_size, sum(p_size) over (partition by p_mfgr,p_name rows between unbounded preceding and current row) as s1 +from noop(on + noopwithmap(on + noop(on + noop(on part + partition by p_mfgr + order by p_mfgr) + ) + partition by p_mfgr,p_name + order by p_mfgr,p_name) + partition by p_mfgr,p_name + order by p_mfgr,p_name) ; + select p_mfgr, p_name, rank() over (partition by p_mfgr,p_name) as r, dense_rank() over (partition by p_mfgr,p_name) as dr, @@ -210,6 +390,23 @@ from noop(on order by p_mfgr,p_name) ; -- 19. testMulti3OperatorsFunctionChain +explain +select p_mfgr, p_name, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +p_size, sum(p_size) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s1 +from noop(on + noop(on + noop(on + noop(on part + partition by p_mfgr + order by p_mfgr) + ) + partition by p_mfgr,p_name + order by p_mfgr,p_name) + partition by p_mfgr + order by p_mfgr ) ; + select p_mfgr, p_name, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -227,6 +424,21 @@ from noop(on order by p_mfgr ) ; -- 20. testMultiOperatorChainWithNoWindowing +explain +select p_mfgr, p_name, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +p_size, sum(p_size) over (partition by p_mfgr order by p_name) as s1 +from noop(on + noop(on + noop(on + noop(on part + partition by p_mfgr,p_name + order by p_mfgr,p_name) + ) + partition by p_mfgr + order by p_mfgr)); + select p_mfgr, p_name, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -243,6 +455,23 @@ from noop(on -- 21. testMultiOperatorChainEndsWithNoopMap +explain +select p_mfgr, p_name, +rank() over (partition by p_mfgr,p_name) as r, +dense_rank() over (partition by p_mfgr,p_name) as dr, +p_size, sum(p_size) over (partition by p_mfgr,p_name rows between unbounded preceding and current row) as s1 +from noopwithmap(on + noop(on + noop(on + noop(on part + partition by p_mfgr,p_name + order by p_mfgr,p_name) + ) + partition by p_mfgr + order by p_mfgr) + partition by p_mfgr,p_name + order by p_mfgr,p_name); + select p_mfgr, p_name, rank() over (partition by p_mfgr,p_name) as r, dense_rank() over (partition by p_mfgr,p_name) as dr, @@ -260,6 +489,22 @@ from noopwithmap(on order by p_mfgr,p_name); -- 22. testMultiOperatorChainWithDiffPartitionForWindow1 +explain +select p_mfgr, p_name, +rank() over (partition by p_mfgr,p_name order by p_mfgr,p_name) as r, +dense_rank() over (partition by p_mfgr,p_name order by p_mfgr,p_name) as dr, +p_size, +sum(p_size) over (partition by p_mfgr,p_name order by p_mfgr,p_name rows between unbounded preceding and current row) as s1, +sum(p_size) over (partition by p_mfgr,p_name order by p_mfgr,p_name rows between unbounded preceding and current row) as s2 +from noop(on + noopwithmap(on + noop(on part + partition by p_mfgr, p_name + order by p_mfgr, p_name) + partition by p_mfgr + order by p_mfgr + )); + select p_mfgr, p_name, rank() over (partition by p_mfgr,p_name order by p_mfgr,p_name) as r, dense_rank() over (partition by p_mfgr,p_name order by p_mfgr,p_name) as dr, @@ -276,6 +521,20 @@ from noop(on )); -- 23. testMultiOperatorChainWithDiffPartitionForWindow2 +explain +select p_mfgr, p_name, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +p_size, +sum(p_size) over (partition by p_mfgr order by p_name range between unbounded preceding and current row) as s1, +sum(p_size) over (partition by p_mfgr order by p_name range between unbounded preceding and current row) as s2 +from noopwithmap(on + noop(on + noop(on part + partition by p_mfgr, p_name + order by p_mfgr, p_name) + )); + select p_mfgr, p_name, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, diff --git ql/src/test/queries/clientpositive/ptf_streaming.q ql/src/test/queries/clientpositive/ptf_streaming.q index 04b674c..7795365 100644 --- ql/src/test/queries/clientpositive/ptf_streaming.q +++ ql/src/test/queries/clientpositive/ptf_streaming.q @@ -1,8 +1,16 @@ -create temporary function noopstreaming as 'org.apache.hadoop.hive.ql.udf.ptf.NoopStreaming$NoopStreamingResolver'; - -- SORT_QUERY_RESULTS --1. test1 +explain +select p_mfgr, p_name, p_size, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +sum(p_retailprice) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s1 +from noopstreaming(on part + partition by p_mfgr + order by p_name + ); + select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -12,7 +20,15 @@ from noopstreaming(on part order by p_name ); - -- 2. testJoinWithNoop +-- 2. testJoinWithNoop +explain +select p_mfgr, p_name, +p_size, p_size - lag(p_size,1,p_size) over (partition by p_mfgr order by p_name) as deltaSz +from noopstreaming (on (select p1.* from part p1 join part p2 on p1.p_partkey = p2.p_partkey) j +distribute by j.p_mfgr +sort by j.p_name) +; + select p_mfgr, p_name, p_size, p_size - lag(p_size,1,p_size) over (partition by p_mfgr order by p_name) as deltaSz from noopstreaming (on (select p1.* from part p1 join part p2 on p1.p_partkey = p2.p_partkey) j @@ -21,6 +37,13 @@ sort by j.p_name) ; -- 7. testJoin +explain +select abc.* +from noopstreaming(on part +partition by p_mfgr +order by p_name +) abc join part p1 on abc.p_partkey = p1.p_partkey; + select abc.* from noopstreaming(on part partition by p_mfgr @@ -28,6 +51,13 @@ order by p_name ) abc join part p1 on abc.p_partkey = p1.p_partkey; -- 9. testNoopWithMap +explain +select p_mfgr, p_name, p_size, +rank() over (partition by p_mfgr order by p_name, p_size desc) as r +from noopwithmapstreaming(on part +partition by p_mfgr +order by p_name, p_size desc); + select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name, p_size desc) as r from noopwithmapstreaming(on part @@ -35,6 +65,15 @@ partition by p_mfgr order by p_name, p_size desc); -- 10. testNoopWithMapWithWindowing +explain +select p_mfgr, p_name, p_size, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +sum(p_retailprice) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s1 +from noopwithmapstreaming(on part + partition by p_mfgr + order by p_name); + select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -44,6 +83,16 @@ from noopwithmapstreaming(on part order by p_name); -- 12. testFunctionChain +explain +select p_mfgr, p_name, p_size, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +sum(p_retailprice) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s1 +from noopstreaming(on noopwithmapstreaming(on noopstreaming(on part +partition by p_mfgr +order by p_mfgr, p_name +))); + select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -54,6 +103,16 @@ order by p_mfgr, p_name ))); -- 12.1 testFunctionChain +explain +select p_mfgr, p_name, p_size, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +sum(p_retailprice) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s1 +from noopstreaming(on noopwithmap(on noopstreaming(on part +partition by p_mfgr +order by p_mfgr, p_name +))); + select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -64,6 +123,16 @@ order by p_mfgr, p_name ))); -- 12.2 testFunctionChain +explain +select p_mfgr, p_name, p_size, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +sum(p_retailprice) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s1 +from noop(on noopwithmapstreaming(on noopstreaming(on part +partition by p_mfgr +order by p_mfgr, p_name +))); + select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -74,6 +143,19 @@ order by p_mfgr, p_name ))); -- 14. testPTFJoinWithWindowingWithCount +explain +select abc.p_mfgr, abc.p_name, +rank() over (distribute by abc.p_mfgr sort by abc.p_name) as r, +dense_rank() over (distribute by abc.p_mfgr sort by abc.p_name) as dr, +count(abc.p_name) over (distribute by abc.p_mfgr sort by abc.p_name) as cd, +abc.p_retailprice, sum(abc.p_retailprice) over (distribute by abc.p_mfgr sort by abc.p_name rows between unbounded preceding and current row) as s1, +abc.p_size, abc.p_size - lag(abc.p_size,1,abc.p_size) over (distribute by abc.p_mfgr sort by abc.p_name) as deltaSz +from noopstreaming(on part +partition by p_mfgr +order by p_name +) abc join part p1 on abc.p_partkey = p1.p_partkey +; + select abc.p_mfgr, abc.p_name, rank() over (distribute by abc.p_mfgr sort by abc.p_name) as r, dense_rank() over (distribute by abc.p_mfgr sort by abc.p_name) as dr, @@ -87,6 +169,23 @@ order by p_name ; -- 18. testMulti2OperatorsFunctionChainWithMap +explain +select p_mfgr, p_name, +rank() over (partition by p_mfgr,p_name) as r, +dense_rank() over (partition by p_mfgr,p_name) as dr, +p_size, sum(p_size) over (partition by p_mfgr,p_name rows between unbounded preceding and current row) as s1 +from noopstreaming(on + noopwithmap(on + noop(on + noopstreaming(on part + partition by p_mfgr + order by p_mfgr) + ) + partition by p_mfgr,p_name + order by p_mfgr,p_name) + partition by p_mfgr,p_name + order by p_mfgr,p_name) ; + select p_mfgr, p_name, rank() over (partition by p_mfgr,p_name) as r, dense_rank() over (partition by p_mfgr,p_name) as dr, @@ -104,6 +203,23 @@ from noopstreaming(on order by p_mfgr,p_name) ; -- 19. testMulti3OperatorsFunctionChain +explain +select p_mfgr, p_name, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +p_size, sum(p_size) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s1 +from noop(on + noopstreaming(on + noop(on + noopstreaming(on part + partition by p_mfgr + order by p_mfgr) + ) + partition by p_mfgr,p_name + order by p_mfgr,p_name) + partition by p_mfgr + order by p_mfgr ) ; + select p_mfgr, p_name, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -121,6 +237,20 @@ from noop(on order by p_mfgr ) ; -- 23. testMultiOperatorChainWithDiffPartitionForWindow2 +explain +select p_mfgr, p_name, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +p_size, +sum(p_size) over (partition by p_mfgr order by p_name range between unbounded preceding and current row) as s1, +sum(p_size) over (partition by p_mfgr order by p_name range between unbounded preceding and current row) as s2 +from noopwithmapstreaming(on + noop(on + noopstreaming(on part + partition by p_mfgr, p_name + order by p_mfgr, p_name) + )); + select p_mfgr, p_name, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, diff --git ql/src/test/results/clientpositive/ptf.q.out ql/src/test/results/clientpositive/ptf.q.out index 9196b94..e47689b 100644 --- ql/src/test/results/clientpositive/ptf.q.out +++ ql/src/test/results/clientpositive/ptf.q.out @@ -1,6 +1,7 @@ PREHOOK: query: -- SORT_QUERY_RESULTS --1. test1 +explain select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -10,11 +11,10 @@ from noop(on part order by p_name ) PREHOOK: type: QUERY -PREHOOK: Input: default@part -#### A masked pattern was here #### POSTHOOK: query: -- SORT_QUERY_RESULTS --1. test1 +explain select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -24,6 +24,89 @@ from noop(on part order by p_name ) POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_mfgr (type: string), p_name (type: string) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int), p_retailprice (type: double) + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int), _col7 (type: double) + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr, p_name, p_size, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +sum(p_retailprice) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s1 +from noop(on part + partition by p_mfgr + order by p_name + ) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, p_name, p_size, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +sum(p_retailprice) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s1 +from noop(on part + partition by p_mfgr + order by p_name + ) +POSTHOOK: type: QUERY POSTHOOK: Input: default@part #### A masked pattern was here #### Manufacturer#1 almond antique burnished rose metallic 2 1 1 1173.15 @@ -53,21 +136,139 @@ Manufacturer#5 almond antique sky peru orange 2 3 3 5190.08 Manufacturer#5 almond aquamarine dodger light gainsboro 46 4 4 6208.18 Manufacturer#5 almond azure blanched chiffon midnight 23 5 5 7672.66 PREHOOK: query: -- 2. testJoinWithNoop +explain select p_mfgr, p_name, p_size, p_size - lag(p_size,1,p_size) over (partition by p_mfgr order by p_name) as deltaSz from noop (on (select p1.* from part p1 join part p2 on p1.p_partkey = p2.p_partkey) j distribute by j.p_mfgr sort by j.p_name) PREHOOK: type: QUERY -PREHOOK: Input: default@part -#### A masked pattern was here #### POSTHOOK: query: -- 2. testJoinWithNoop +explain select p_mfgr, p_name, p_size, p_size - lag(p_size,1,p_size) over (partition by p_mfgr order by p_name) as deltaSz from noop (on (select p1.* from part p1 join part p2 on p1.p_partkey = p2.p_partkey) j distribute by j.p_mfgr sort by j.p_name) POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-3 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-3 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: p1 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: p_partkey is not null (type: boolean) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_partkey (type: int) + sort order: + + Map-reduce partition columns: p_partkey (type: int) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int) + TableScan + alias: p2 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: p_partkey is not null (type: boolean) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_partkey (type: int) + sort order: + + Map-reduce partition columns: p_partkey (type: int) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 p_partkey (type: int) + 1 p_partkey (type: int) + outputColumnNames: _col1, _col2, _col5 + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) + Reduce Operator Tree: + Extract + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) + Reduce Operator Tree: + Extract + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), (_col5 - _wcol0) (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr, p_name, +p_size, p_size - lag(p_size,1,p_size) over (partition by p_mfgr order by p_name) as deltaSz +from noop (on (select p1.* from part p1 join part p2 on p1.p_partkey = p2.p_partkey) j +distribute by j.p_mfgr +sort by j.p_name) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, p_name, +p_size, p_size - lag(p_size,1,p_size) over (partition by p_mfgr order by p_name) as deltaSz +from noop (on (select p1.* from part p1 join part p2 on p1.p_partkey = p2.p_partkey) j +distribute by j.p_mfgr +sort by j.p_name) +POSTHOOK: type: QUERY POSTHOOK: Input: default@part #### A masked pattern was here #### Manufacturer#1 almond antique burnished rose metallic 2 0 @@ -99,19 +300,71 @@ Manufacturer#5 almond antique sky peru orange 2 -4 Manufacturer#5 almond aquamarine dodger light gainsboro 46 44 Manufacturer#5 almond azure blanched chiffon midnight 23 -23 PREHOOK: query: -- 3. testOnlyPTF +explain select p_mfgr, p_name, p_size from noop(on part partition by p_mfgr order by p_name) PREHOOK: type: QUERY -PREHOOK: Input: default@part -#### A masked pattern was here #### POSTHOOK: query: -- 3. testOnlyPTF +explain select p_mfgr, p_name, p_size from noop(on part partition by p_mfgr order by p_name) POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_mfgr (type: string), p_name (type: string) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int) + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr, p_name, p_size +from noop(on part +partition by p_mfgr +order by p_name) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, p_name, p_size +from noop(on part +partition by p_mfgr +order by p_name) +POSTHOOK: type: QUERY POSTHOOK: Input: default@part #### A masked pattern was here #### Manufacturer#1 almond antique burnished rose metallic 2 @@ -141,6 +394,7 @@ Manufacturer#5 almond antique sky peru orange 2 Manufacturer#5 almond aquamarine dodger light gainsboro 46 Manufacturer#5 almond azure blanched chiffon midnight 23 PREHOOK: query: -- 4. testPTFAlias +explain select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -150,9 +404,8 @@ from noop(on part order by p_name ) abc PREHOOK: type: QUERY -PREHOOK: Input: default@part -#### A masked pattern was here #### POSTHOOK: query: -- 4. testPTFAlias +explain select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -162,6 +415,89 @@ from noop(on part order by p_name ) abc POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_mfgr (type: string), p_name (type: string) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int), p_retailprice (type: double) + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int), _col7 (type: double) + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr, p_name, p_size, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +sum(p_retailprice) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s1 +from noop(on part + partition by p_mfgr + order by p_name + ) abc +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, p_name, p_size, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +sum(p_retailprice) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s1 +from noop(on part + partition by p_mfgr + order by p_name + ) abc +POSTHOOK: type: QUERY POSTHOOK: Input: default@part #### A masked pattern was here #### Manufacturer#1 almond antique burnished rose metallic 2 1 1 1173.15 @@ -191,6 +527,7 @@ Manufacturer#5 almond antique sky peru orange 2 3 3 5190.08 Manufacturer#5 almond aquamarine dodger light gainsboro 46 4 4 6208.18 Manufacturer#5 almond azure blanched chiffon midnight 23 5 5 7672.66 PREHOOK: query: -- 5. testPTFAndWhereWithWindowing +explain select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -200,9 +537,8 @@ from noop(on part order by p_name ) PREHOOK: type: QUERY -PREHOOK: Input: default@part -#### A masked pattern was here #### POSTHOOK: query: -- 5. testPTFAndWhereWithWindowing +explain select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -212,6 +548,89 @@ from noop(on part order by p_name ) POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_mfgr (type: string), p_name (type: string) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int) + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _col5 (type: int), (_col5 - _wcol2) (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr, p_name, p_size, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +p_size, p_size - lag(p_size,1,p_size) over (partition by p_mfgr order by p_name) as deltaSz +from noop(on part + partition by p_mfgr + order by p_name + ) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, p_name, p_size, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +p_size, p_size - lag(p_size,1,p_size) over (partition by p_mfgr order by p_name) as deltaSz +from noop(on part + partition by p_mfgr + order by p_name + ) +POSTHOOK: type: QUERY POSTHOOK: Input: default@part #### A masked pattern was here #### Manufacturer#1 almond antique burnished rose metallic 2 1 1 2 0 @@ -241,6 +660,7 @@ Manufacturer#5 almond antique sky peru orange 2 3 3 2 -4 Manufacturer#5 almond aquamarine dodger light gainsboro 46 4 4 46 44 Manufacturer#5 almond azure blanched chiffon midnight 23 5 5 23 -23 PREHOOK: query: -- 6. testSWQAndPTFAndGBy +explain select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -251,9 +671,8 @@ from noop(on part ) group by p_mfgr, p_name, p_size PREHOOK: type: QUERY -PREHOOK: Input: default@part -#### A masked pattern was here #### POSTHOOK: query: -- 6. testSWQAndPTFAndGBy +explain select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -264,6 +683,123 @@ from noop(on part ) group by p_mfgr, p_name, p_size POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-3 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-3 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_mfgr (type: string), p_name (type: string) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int) + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int) + outputColumnNames: _col2, _col1, _col5 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col2 (type: string), _col1 (type: string), _col5 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + Reduce Operator Tree: + Extract + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int), _wcol0 (type: int), _wcol1 (type: int), _col2 (type: int), (_col2 - _wcol2) (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr, p_name, p_size, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +p_size, p_size - lag(p_size,1,p_size) over (partition by p_mfgr order by p_name) as deltaSz +from noop(on part + partition by p_mfgr + order by p_name + ) +group by p_mfgr, p_name, p_size +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, p_name, p_size, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +p_size, p_size - lag(p_size,1,p_size) over (partition by p_mfgr order by p_name) as deltaSz +from noop(on part + partition by p_mfgr + order by p_name + ) +group by p_mfgr, p_name, p_size +POSTHOOK: type: QUERY POSTHOOK: Input: default@part #### A masked pattern was here #### Manufacturer#1 almond antique burnished rose metallic 2 1 1 2 0 @@ -292,21 +828,116 @@ Manufacturer#5 almond antique sky peru orange 2 3 3 2 -4 Manufacturer#5 almond aquamarine dodger light gainsboro 46 4 4 46 44 Manufacturer#5 almond azure blanched chiffon midnight 23 5 5 23 -23 PREHOOK: query: -- 7. testJoin +explain select abc.* from noop(on part partition by p_mfgr order by p_name ) abc join part p1 on abc.p_partkey = p1.p_partkey PREHOOK: type: QUERY -PREHOOK: Input: default@part -#### A masked pattern was here #### POSTHOOK: query: -- 7. testJoin +explain select abc.* from noop(on part partition by p_mfgr order by p_name ) abc join part p1 on abc.p_partkey = p1.p_partkey POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_mfgr (type: string), p_name (type: string) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: _col0 is not null (type: boolean) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + TableScan + alias: p1 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: p_partkey is not null (type: boolean) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_partkey (type: int) + sort order: + + Map-reduce partition columns: p_partkey (type: int) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 p_partkey (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select abc.* +from noop(on part +partition by p_mfgr +order by p_name +) abc join part p1 on abc.p_partkey = p1.p_partkey +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select abc.* +from noop(on part +partition by p_mfgr +order by p_name +) abc join part p1 on abc.p_partkey = p1.p_partkey +POSTHOOK: type: QUERY POSTHOOK: Input: default@part #### A masked pattern was here #### 105685 almond antique violet chocolate turquoise Manufacturer#2 Brand#22 MEDIUM ANODIZED COPPER 14 MED CAN 1690.68 ly pending requ @@ -338,21 +969,116 @@ POSTHOOK: Input: default@part 86428 almond aquamarine burnished black steel Manufacturer#1 Brand#12 STANDARD ANODIZED STEEL 28 WRAP BAG 1414.42 arefully 90681 almond antique chartreuse khaki white Manufacturer#3 Brand#31 MEDIUM BURNISHED TIN 17 SM CASE 1671.68 are slyly after the sl PREHOOK: query: -- 8. testJoinRight +explain select abc.* from part p1 join noop(on part partition by p_mfgr order by p_name ) abc on abc.p_partkey = p1.p_partkey PREHOOK: type: QUERY -PREHOOK: Input: default@part -#### A masked pattern was here #### POSTHOOK: query: -- 8. testJoinRight +explain select abc.* from part p1 join noop(on part partition by p_mfgr order by p_name ) abc on abc.p_partkey = p1.p_partkey POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-2 is a root stage + Stage-1 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_mfgr (type: string), p_name (type: string) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: _col0 is not null (type: boolean) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: p1 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: p_partkey is not null (type: boolean) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_partkey (type: int) + sort order: + + Map-reduce partition columns: p_partkey (type: int) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + TableScan + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 p_partkey (type: int) + 1 _col0 (type: int) + outputColumnNames: _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col12 (type: int), _col13 (type: string), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select abc.* +from part p1 join noop(on part +partition by p_mfgr +order by p_name +) abc on abc.p_partkey = p1.p_partkey +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select abc.* +from part p1 join noop(on part +partition by p_mfgr +order by p_name +) abc on abc.p_partkey = p1.p_partkey +POSTHOOK: type: QUERY POSTHOOK: Input: default@part #### A masked pattern was here #### 105685 almond antique violet chocolate turquoise Manufacturer#2 Brand#22 MEDIUM ANODIZED COPPER 14 MED CAN 1690.68 ly pending requ @@ -384,21 +1110,100 @@ POSTHOOK: Input: default@part 86428 almond aquamarine burnished black steel Manufacturer#1 Brand#12 STANDARD ANODIZED STEEL 28 WRAP BAG 1414.42 arefully 90681 almond antique chartreuse khaki white Manufacturer#3 Brand#31 MEDIUM BURNISHED TIN 17 SM CASE 1671.68 are slyly after the sl PREHOOK: query: -- 9. testNoopWithMap +explain select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name, p_size desc) as r from noopwithmap(on part partition by p_mfgr order by p_name, p_size desc) PREHOOK: type: QUERY -PREHOOK: Input: default@part -#### A masked pattern was here #### POSTHOOK: query: -- 9. testNoopWithMap +explain select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name, p_size desc) as r from noopwithmap(on part partition by p_mfgr order by p_name, p_size desc) POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_mfgr (type: string), p_name (type: string), p_size (type: int) + sort order: ++- + Map-reduce partition columns: p_mfgr (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int) + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int) + sort order: ++- + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr, p_name, p_size, +rank() over (partition by p_mfgr order by p_name, p_size desc) as r +from noopwithmap(on part +partition by p_mfgr +order by p_name, p_size desc) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, p_name, p_size, +rank() over (partition by p_mfgr order by p_name, p_size desc) as r +from noopwithmap(on part +partition by p_mfgr +order by p_name, p_size desc) +POSTHOOK: type: QUERY POSTHOOK: Input: default@part #### A masked pattern was here #### Manufacturer#1 almond antique burnished rose metallic 2 1 @@ -428,6 +1233,7 @@ Manufacturer#5 almond antique sky peru orange 2 3 Manufacturer#5 almond aquamarine dodger light gainsboro 46 4 Manufacturer#5 almond azure blanched chiffon midnight 23 5 PREHOOK: query: -- 10. testNoopWithMapWithWindowing +explain select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -436,9 +1242,8 @@ from noopwithmap(on part partition by p_mfgr order by p_name) PREHOOK: type: QUERY -PREHOOK: Input: default@part -#### A masked pattern was here #### POSTHOOK: query: -- 10. testNoopWithMapWithWindowing +explain select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -447,6 +1252,89 @@ from noopwithmap(on part partition by p_mfgr order by p_name) POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_mfgr (type: string), p_name (type: string) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int), p_retailprice (type: double) + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int), _col7 (type: double) + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr, p_name, p_size, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +sum(p_retailprice) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s1 +from noopwithmap(on part + partition by p_mfgr + order by p_name) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, p_name, p_size, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +sum(p_retailprice) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s1 +from noopwithmap(on part + partition by p_mfgr + order by p_name) +POSTHOOK: type: QUERY POSTHOOK: Input: default@part #### A masked pattern was here #### Manufacturer#1 almond antique burnished rose metallic 2 1 1 1173.15 @@ -476,6 +1364,7 @@ Manufacturer#5 almond antique sky peru orange 2 3 3 5190.08 Manufacturer#5 almond aquamarine dodger light gainsboro 46 4 4 6208.18 Manufacturer#5 almond azure blanched chiffon midnight 23 5 5 7672.66 PREHOOK: query: -- 11. testHavingWithWindowingPTFNoGBY +explain select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -484,9 +1373,8 @@ from noop(on part partition by p_mfgr order by p_name) PREHOOK: type: QUERY -PREHOOK: Input: default@part -#### A masked pattern was here #### POSTHOOK: query: -- 11. testHavingWithWindowingPTFNoGBY +explain select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -495,6 +1383,87 @@ from noop(on part partition by p_mfgr order by p_name) POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_mfgr (type: string), p_name (type: string) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int), p_retailprice (type: double) + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int), _col7 (type: double) + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr, p_name, p_size, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +sum(p_retailprice) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s1 +from noop(on part +partition by p_mfgr +order by p_name) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, p_name, p_size, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +sum(p_retailprice) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s1 +from noop(on part +partition by p_mfgr +order by p_name) +POSTHOOK: type: QUERY POSTHOOK: Input: default@part #### A masked pattern was here #### Manufacturer#1 almond antique burnished rose metallic 2 1 1 1173.15 @@ -524,6 +1493,7 @@ Manufacturer#5 almond antique sky peru orange 2 3 3 5190.08 Manufacturer#5 almond aquamarine dodger light gainsboro 46 4 4 6208.18 Manufacturer#5 almond azure blanched chiffon midnight 23 5 5 7672.66 PREHOOK: query: -- 12. testFunctionChain +explain select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -533,9 +1503,8 @@ partition by p_mfgr order by p_mfgr, p_name ))) PREHOOK: type: QUERY -PREHOOK: Input: default@part -#### A masked pattern was here #### POSTHOOK: query: -- 12. testFunctionChain +explain select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -545,6 +1514,114 @@ partition by p_mfgr order by p_mfgr, p_name ))) POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-3 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-3 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_mfgr (type: string), p_mfgr (type: string), p_name (type: string) + sort order: +++ + Map-reduce partition columns: p_mfgr (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int), p_retailprice (type: double) + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col2 (type: string), _col2 (type: string), _col1 (type: string) + sort order: +++ + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int), _col7 (type: double) + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int), _col7 (type: double) + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr, p_name, p_size, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +sum(p_retailprice) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s1 +from noop(on noopwithmap(on noop(on part +partition by p_mfgr +order by p_mfgr, p_name +))) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, p_name, p_size, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +sum(p_retailprice) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s1 +from noop(on noopwithmap(on noop(on part +partition by p_mfgr +order by p_mfgr, p_name +))) +POSTHOOK: type: QUERY POSTHOOK: Input: default@part #### A masked pattern was here #### Manufacturer#1 almond antique burnished rose metallic 2 1 1 1173.15 @@ -574,6 +1651,7 @@ Manufacturer#5 almond antique sky peru orange 2 3 3 5190.08 Manufacturer#5 almond aquamarine dodger light gainsboro 46 4 4 6208.18 Manufacturer#5 almond azure blanched chiffon midnight 23 5 5 7672.66 PREHOOK: query: -- 13. testPTFAndWindowingInSubQ +explain select p_mfgr, p_name, sub1.cd, sub1.s1 from (select p_mfgr, p_name, @@ -586,9 +1664,8 @@ order by p_name) window w1 as (partition by p_mfgr order by p_name rows between 2 preceding and 2 following) ) sub1 PREHOOK: type: QUERY -PREHOOK: Input: default@part -#### A masked pattern was here #### POSTHOOK: query: -- 13. testPTFAndWindowingInSubQ +explain select p_mfgr, p_name, sub1.cd, sub1.s1 from (select p_mfgr, p_name, @@ -601,6 +1678,95 @@ order by p_name) window w1 as (partition by p_mfgr order by p_name rows between 2 preceding and 2 following) ) sub1 POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_mfgr (type: string), p_name (type: string) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int), p_retailprice (type: double) + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int), _col7 (type: double) + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col1 (type: string), _wcol0 (type: bigint), _wcol1 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr, p_name, +sub1.cd, sub1.s1 +from (select p_mfgr, p_name, +count(p_size) over (partition by p_mfgr order by p_name) as cd, +p_retailprice, +sum(p_retailprice) over w1 as s1 +from noop(on part +partition by p_mfgr +order by p_name) +window w1 as (partition by p_mfgr order by p_name rows between 2 preceding and 2 following) +) sub1 +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, p_name, +sub1.cd, sub1.s1 +from (select p_mfgr, p_name, +count(p_size) over (partition by p_mfgr order by p_name) as cd, +p_retailprice, +sum(p_retailprice) over w1 as s1 +from noop(on part +partition by p_mfgr +order by p_name) +window w1 as (partition by p_mfgr order by p_name rows between 2 preceding and 2 following) +) sub1 +POSTHOOK: type: QUERY POSTHOOK: Input: default@part #### A masked pattern was here #### Manufacturer#1 almond antique burnished rose metallic 2 4100.06 @@ -630,6 +1796,7 @@ Manufacturer#5 almond antique sky peru orange 3 7672.66 Manufacturer#5 almond aquamarine dodger light gainsboro 4 5882.970000000001 Manufacturer#5 almond azure blanched chiffon midnight 5 4271.3099999999995 PREHOOK: query: -- 14. testPTFJoinWithWindowingWithCount +explain select abc.p_mfgr, abc.p_name, rank() over (distribute by abc.p_mfgr sort by abc.p_name) as r, dense_rank() over (distribute by abc.p_mfgr sort by abc.p_name) as dr, @@ -641,9 +1808,8 @@ partition by p_mfgr order by p_name ) abc join part p1 on abc.p_partkey = p1.p_partkey PREHOOK: type: QUERY -PREHOOK: Input: default@part -#### A masked pattern was here #### POSTHOOK: query: -- 14. testPTFJoinWithWindowingWithCount +explain select abc.p_mfgr, abc.p_name, rank() over (distribute by abc.p_mfgr sort by abc.p_name) as r, dense_rank() over (distribute by abc.p_mfgr sort by abc.p_name) as dr, @@ -655,6 +1821,134 @@ partition by p_mfgr order by p_name ) abc join part p1 on abc.p_partkey = p1.p_partkey POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-3 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-3 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_mfgr (type: string), p_name (type: string) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_size (type: int), p_retailprice (type: double) + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: _col0 is not null (type: boolean) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int), _col7 (type: double) + TableScan + alias: p1 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: p_partkey is not null (type: boolean) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_partkey (type: int) + sort order: + + Map-reduce partition columns: p_partkey (type: int) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 p_partkey (type: int) + outputColumnNames: _col1, _col2, _col5, _col7 + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int), _col7 (type: double) + Reduce Operator Tree: + Extract + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col1 (type: string), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: bigint), _col7 (type: double), _wcol3 (type: double), _col5 (type: int), (_col5 - _wcol4) (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select abc.p_mfgr, abc.p_name, +rank() over (distribute by abc.p_mfgr sort by abc.p_name) as r, +dense_rank() over (distribute by abc.p_mfgr sort by abc.p_name) as dr, +count(abc.p_name) over (distribute by abc.p_mfgr sort by abc.p_name) as cd, +abc.p_retailprice, sum(abc.p_retailprice) over (distribute by abc.p_mfgr sort by abc.p_name rows between unbounded preceding and current row) as s1, +abc.p_size, abc.p_size - lag(abc.p_size,1,abc.p_size) over (distribute by abc.p_mfgr sort by abc.p_name) as deltaSz +from noop(on part +partition by p_mfgr +order by p_name +) abc join part p1 on abc.p_partkey = p1.p_partkey +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select abc.p_mfgr, abc.p_name, +rank() over (distribute by abc.p_mfgr sort by abc.p_name) as r, +dense_rank() over (distribute by abc.p_mfgr sort by abc.p_name) as dr, +count(abc.p_name) over (distribute by abc.p_mfgr sort by abc.p_name) as cd, +abc.p_retailprice, sum(abc.p_retailprice) over (distribute by abc.p_mfgr sort by abc.p_name rows between unbounded preceding and current row) as s1, +abc.p_size, abc.p_size - lag(abc.p_size,1,abc.p_size) over (distribute by abc.p_mfgr sort by abc.p_name) as deltaSz +from noop(on part +partition by p_mfgr +order by p_name +) abc join part p1 on abc.p_partkey = p1.p_partkey +POSTHOOK: type: QUERY POSTHOOK: Input: default@part #### A masked pattern was here #### Manufacturer#1 almond antique burnished rose metallic 1 1 4 1173.15 1173.15 2 0 @@ -686,19 +1980,103 @@ Manufacturer#5 almond antique sky peru orange 3 3 3 1788.73 5190.08 2 -4 Manufacturer#5 almond aquamarine dodger light gainsboro 4 4 4 1018.1 6208.18 46 44 Manufacturer#5 almond azure blanched chiffon midnight 5 5 5 1464.48 7672.66 23 -23 PREHOOK: query: -- 15. testDistinctInSelectWithPTF +explain select DISTINCT p_mfgr, p_name, p_size from noop(on part partition by p_mfgr order by p_name) PREHOOK: type: QUERY -PREHOOK: Input: default@part -#### A masked pattern was here #### POSTHOOK: query: -- 15. testDistinctInSelectWithPTF +explain select DISTINCT p_mfgr, p_name, p_size from noop(on part partition by p_mfgr order by p_name) POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_mfgr (type: string), p_name (type: string) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int) + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int) + outputColumnNames: _col2, _col1, _col5 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col2 (type: string), _col1 (type: string), _col5 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select DISTINCT p_mfgr, p_name, p_size +from noop(on part +partition by p_mfgr +order by p_name) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select DISTINCT p_mfgr, p_name, p_size +from noop(on part +partition by p_mfgr +order by p_name) +POSTHOOK: type: QUERY POSTHOOK: Input: default@part #### A masked pattern was here #### Manufacturer#1 almond antique burnished rose metallic 2 @@ -746,6 +2124,120 @@ POSTHOOK: type: CREATEVIEW POSTHOOK: Input: default@part POSTHOOK: Output: database:default POSTHOOK: Output: default@mfgr_price_view +PREHOOK: query: explain +select p_mfgr, p_brand, s, +sum(s) over w1 as s1 +from noop(on mfgr_price_view +partition by p_mfgr +order by p_mfgr) +window w1 as ( partition by p_mfgr order by p_brand rows between 2 preceding and current row) +PREHOOK: type: QUERY +POSTHOOK: query: explain +select p_mfgr, p_brand, s, +sum(s) over w1 as s1 +from noop(on mfgr_price_view +partition by p_mfgr +order by p_mfgr) +window w1 as ( partition by p_mfgr order by p_brand rows between 2 preceding and current row) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-3 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-3 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: p_mfgr (type: string), p_brand (type: string), p_retailprice (type: double) + outputColumnNames: p_mfgr, p_brand, p_retailprice + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(p_retailprice) + keys: p_mfgr (type: string), p_brand (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: double) + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string), _col0 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: double) + Reduce Operator Tree: + Extract + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: double) + Reduce Operator Tree: + Extract + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: double), _wcol0 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + PREHOOK: query: select p_mfgr, p_brand, s, sum(s) over w1 as s1 from noop(on mfgr_price_view @@ -828,6 +2320,183 @@ fv1 INT) POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@part_5 +PREHOOK: query: explain +from noop(on part +partition by p_mfgr +order by p_name) +INSERT OVERWRITE TABLE part_4 select p_mfgr, p_name, p_size, +rank() over (distribute by p_mfgr sort by p_name) as r, +dense_rank() over (distribute by p_mfgr sort by p_name) as dr, +sum(p_retailprice) over (distribute by p_mfgr sort by p_name rows between unbounded preceding and current row) as s +INSERT OVERWRITE TABLE part_5 select p_mfgr,p_name, p_size, +round(sum(p_size) over (distribute by p_mfgr sort by p_size range between 5 preceding and current row),1) as s2, +rank() over (distribute by p_mfgr sort by p_mfgr, p_name) as r, +dense_rank() over (distribute by p_mfgr sort by p_mfgr, p_name) as dr, +cume_dist() over (distribute by p_mfgr sort by p_mfgr, p_name) as cud, +first_value(p_size, true) over w1 as fv1 +window w1 as (distribute by p_mfgr sort by p_mfgr, p_name rows between 2 preceding and 2 following) +PREHOOK: type: QUERY +POSTHOOK: query: explain +from noop(on part +partition by p_mfgr +order by p_name) +INSERT OVERWRITE TABLE part_4 select p_mfgr, p_name, p_size, +rank() over (distribute by p_mfgr sort by p_name) as r, +dense_rank() over (distribute by p_mfgr sort by p_name) as dr, +sum(p_retailprice) over (distribute by p_mfgr sort by p_name rows between unbounded preceding and current row) as s +INSERT OVERWRITE TABLE part_5 select p_mfgr,p_name, p_size, +round(sum(p_size) over (distribute by p_mfgr sort by p_size range between 5 preceding and current row),1) as s2, +rank() over (distribute by p_mfgr sort by p_mfgr, p_name) as r, +dense_rank() over (distribute by p_mfgr sort by p_mfgr, p_name) as dr, +cume_dist() over (distribute by p_mfgr sort by p_mfgr, p_name) as cud, +first_value(p_size, true) over w1 as fv1 +window w1 as (distribute by p_mfgr sort by p_mfgr, p_name rows between 2 preceding and 2 following) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-2 is a root stage + Stage-3 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-3 + Stage-4 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-2 + Stage-6 depends on stages: Stage-5 + Stage-1 depends on stages: Stage-6 + Stage-7 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_mfgr (type: string), p_name (type: string) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int), p_retailprice (type: double) + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int), _col7 (type: double) + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.part_4 + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.part_4 + + Stage: Stage-4 + Stats-Aggr Operator + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col2 (type: string), _col5 (type: int) + sort order: ++ + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col2 (type: string), _col2 (type: string), _col1 (type: string) + sort order: +++ + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _wcol0 (type: bigint), _col1 (type: string), _col2 (type: string), _col5 (type: int) + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col3 (type: string), _col2 (type: string), _col6 (type: int), UDFToInteger(round(_col0, 1)) (type: int), _wcol1 (type: int), _wcol2 (type: int), _wcol3 (type: double), _wcol4 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.part_5 + + Stage: Stage-1 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.part_5 + + Stage: Stage-7 + Stats-Aggr Operator + PREHOOK: query: from noop(on part partition by p_mfgr order by p_name) @@ -947,6 +2616,7 @@ Manufacturer#5 almond antique sky peru orange 2 2 3 3 0.6 31 Manufacturer#5 almond aquamarine dodger light gainsboro 46 46 4 4 0.8 6 Manufacturer#5 almond azure blanched chiffon midnight 23 23 5 5 1.0 2 PREHOOK: query: -- 18. testMulti2OperatorsFunctionChainWithMap +explain select p_mfgr, p_name, rank() over (partition by p_mfgr,p_name) as r, dense_rank() over (partition by p_mfgr,p_name) as dr, @@ -963,9 +2633,8 @@ from noop(on partition by p_mfgr,p_name order by p_mfgr,p_name) PREHOOK: type: QUERY -PREHOOK: Input: default@part -#### A masked pattern was here #### POSTHOOK: query: -- 18. testMulti2OperatorsFunctionChainWithMap +explain select p_mfgr, p_name, rank() over (partition by p_mfgr,p_name) as r, dense_rank() over (partition by p_mfgr,p_name) as dr, @@ -982,6 +2651,128 @@ from noop(on partition by p_mfgr,p_name order by p_mfgr,p_name) POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-3 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-3 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_mfgr (type: string), p_mfgr (type: string) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int) + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string), _col2 (type: string), _col1 (type: string) + sort order: ++++ + Map-reduce partition columns: _col2 (type: string), _col1 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string), _col2 (type: string), _col1 (type: string) + sort order: ++++ + Map-reduce partition columns: _col2 (type: string), _col1 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col1 (type: string), _wcol0 (type: int), _wcol1 (type: int), _col5 (type: int), _wcol2 (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr, p_name, +rank() over (partition by p_mfgr,p_name) as r, +dense_rank() over (partition by p_mfgr,p_name) as dr, +p_size, sum(p_size) over (partition by p_mfgr,p_name rows between unbounded preceding and current row) as s1 +from noop(on + noopwithmap(on + noop(on + noop(on part + partition by p_mfgr + order by p_mfgr) + ) + partition by p_mfgr,p_name + order by p_mfgr,p_name) + partition by p_mfgr,p_name + order by p_mfgr,p_name) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, p_name, +rank() over (partition by p_mfgr,p_name) as r, +dense_rank() over (partition by p_mfgr,p_name) as dr, +p_size, sum(p_size) over (partition by p_mfgr,p_name rows between unbounded preceding and current row) as s1 +from noop(on + noopwithmap(on + noop(on + noop(on part + partition by p_mfgr + order by p_mfgr) + ) + partition by p_mfgr,p_name + order by p_mfgr,p_name) + partition by p_mfgr,p_name + order by p_mfgr,p_name) +POSTHOOK: type: QUERY POSTHOOK: Input: default@part #### A masked pattern was here #### Manufacturer#1 almond antique burnished rose metallic 1 1 2 2 @@ -1011,6 +2802,7 @@ Manufacturer#5 almond antique sky peru orange 1 1 2 2 Manufacturer#5 almond aquamarine dodger light gainsboro 1 1 46 46 Manufacturer#5 almond azure blanched chiffon midnight 1 1 23 23 PREHOOK: query: -- 19. testMulti3OperatorsFunctionChain +explain select p_mfgr, p_name, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -1027,9 +2819,8 @@ from noop(on partition by p_mfgr order by p_mfgr ) PREHOOK: type: QUERY -PREHOOK: Input: default@part -#### A masked pattern was here #### POSTHOOK: query: -- 19. testMulti3OperatorsFunctionChain +explain select p_mfgr, p_name, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -1046,6 +2837,149 @@ from noop(on partition by p_mfgr order by p_mfgr ) POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-3 depends on stages: Stage-2 + Stage-4 depends on stages: Stage-3 + Stage-0 depends on stages: Stage-4 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_mfgr (type: string), p_mfgr (type: string) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int) + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string), _col2 (type: string), _col1 (type: string) + sort order: ++++ + Map-reduce partition columns: _col2 (type: string), _col1 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col2 (type: string), _col2 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col1 (type: string), _wcol0 (type: int), _wcol1 (type: int), _col5 (type: int), _wcol2 (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr, p_name, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +p_size, sum(p_size) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s1 +from noop(on + noop(on + noop(on + noop(on part + partition by p_mfgr + order by p_mfgr) + ) + partition by p_mfgr,p_name + order by p_mfgr,p_name) + partition by p_mfgr + order by p_mfgr ) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, p_name, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +p_size, sum(p_size) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s1 +from noop(on + noop(on + noop(on + noop(on part + partition by p_mfgr + order by p_mfgr) + ) + partition by p_mfgr,p_name + order by p_mfgr,p_name) + partition by p_mfgr + order by p_mfgr ) +POSTHOOK: type: QUERY POSTHOOK: Input: default@part #### A masked pattern was here #### Manufacturer#1 almond antique burnished rose metallic 1 1 2 2 @@ -1075,6 +3009,7 @@ Manufacturer#5 almond antique sky peru orange 3 3 2 39 Manufacturer#5 almond aquamarine dodger light gainsboro 4 4 46 85 Manufacturer#5 almond azure blanched chiffon midnight 5 5 23 108 PREHOOK: query: -- 20. testMultiOperatorChainWithNoWindowing +explain select p_mfgr, p_name, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -1089,9 +3024,8 @@ from noop(on partition by p_mfgr order by p_mfgr)) PREHOOK: type: QUERY -PREHOOK: Input: default@part -#### A masked pattern was here #### POSTHOOK: query: -- 20. testMultiOperatorChainWithNoWindowing +explain select p_mfgr, p_name, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -1106,6 +3040,122 @@ from noop(on partition by p_mfgr order by p_mfgr)) POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-3 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-3 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_mfgr (type: string), p_name (type: string), p_mfgr (type: string), p_name (type: string) + sort order: ++++ + Map-reduce partition columns: p_mfgr (type: string), p_name (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int) + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col2 (type: string), _col2 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col1 (type: string), _wcol0 (type: int), _wcol1 (type: int), _col5 (type: int), _wcol2 (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr, p_name, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +p_size, sum(p_size) over (partition by p_mfgr order by p_name) as s1 +from noop(on + noop(on + noop(on + noop(on part + partition by p_mfgr,p_name + order by p_mfgr,p_name) + ) + partition by p_mfgr + order by p_mfgr)) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, p_name, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +p_size, sum(p_size) over (partition by p_mfgr order by p_name) as s1 +from noop(on + noop(on + noop(on + noop(on part + partition by p_mfgr,p_name + order by p_mfgr,p_name) + ) + partition by p_mfgr + order by p_mfgr)) +POSTHOOK: type: QUERY POSTHOOK: Input: default@part #### A masked pattern was here #### Manufacturer#1 almond antique burnished rose metallic 1 1 2 4 @@ -1135,6 +3185,7 @@ Manufacturer#5 almond antique sky peru orange 3 3 2 39 Manufacturer#5 almond aquamarine dodger light gainsboro 4 4 46 85 Manufacturer#5 almond azure blanched chiffon midnight 5 5 23 108 PREHOOK: query: -- 21. testMultiOperatorChainEndsWithNoopMap +explain select p_mfgr, p_name, rank() over (partition by p_mfgr,p_name) as r, dense_rank() over (partition by p_mfgr,p_name) as dr, @@ -1151,9 +3202,8 @@ from noopwithmap(on partition by p_mfgr,p_name order by p_mfgr,p_name) PREHOOK: type: QUERY -PREHOOK: Input: default@part -#### A masked pattern was here #### POSTHOOK: query: -- 21. testMultiOperatorChainEndsWithNoopMap +explain select p_mfgr, p_name, rank() over (partition by p_mfgr,p_name) as r, dense_rank() over (partition by p_mfgr,p_name) as dr, @@ -1170,6 +3220,151 @@ from noopwithmap(on partition by p_mfgr,p_name order by p_mfgr,p_name) POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-3 depends on stages: Stage-2 + Stage-4 depends on stages: Stage-3 + Stage-0 depends on stages: Stage-4 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_mfgr (type: string), p_name (type: string), p_mfgr (type: string), p_name (type: string) + sort order: ++++ + Map-reduce partition columns: p_mfgr (type: string), p_name (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int) + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col2 (type: string), _col2 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string), _col2 (type: string), _col1 (type: string) + sort order: ++++ + Map-reduce partition columns: _col2 (type: string), _col1 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string), _col2 (type: string), _col1 (type: string) + sort order: ++++ + Map-reduce partition columns: _col2 (type: string), _col1 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col1 (type: string), _wcol0 (type: int), _wcol1 (type: int), _col5 (type: int), _wcol2 (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr, p_name, +rank() over (partition by p_mfgr,p_name) as r, +dense_rank() over (partition by p_mfgr,p_name) as dr, +p_size, sum(p_size) over (partition by p_mfgr,p_name rows between unbounded preceding and current row) as s1 +from noopwithmap(on + noop(on + noop(on + noop(on part + partition by p_mfgr,p_name + order by p_mfgr,p_name) + ) + partition by p_mfgr + order by p_mfgr) + partition by p_mfgr,p_name + order by p_mfgr,p_name) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, p_name, +rank() over (partition by p_mfgr,p_name) as r, +dense_rank() over (partition by p_mfgr,p_name) as dr, +p_size, sum(p_size) over (partition by p_mfgr,p_name rows between unbounded preceding and current row) as s1 +from noopwithmap(on + noop(on + noop(on + noop(on part + partition by p_mfgr,p_name + order by p_mfgr,p_name) + ) + partition by p_mfgr + order by p_mfgr) + partition by p_mfgr,p_name + order by p_mfgr,p_name) +POSTHOOK: type: QUERY POSTHOOK: Input: default@part #### A masked pattern was here #### Manufacturer#1 almond antique burnished rose metallic 1 1 2 2 @@ -1199,6 +3394,7 @@ Manufacturer#5 almond antique sky peru orange 1 1 2 2 Manufacturer#5 almond aquamarine dodger light gainsboro 1 1 46 46 Manufacturer#5 almond azure blanched chiffon midnight 1 1 23 23 PREHOOK: query: -- 22. testMultiOperatorChainWithDiffPartitionForWindow1 +explain select p_mfgr, p_name, rank() over (partition by p_mfgr,p_name order by p_mfgr,p_name) as r, dense_rank() over (partition by p_mfgr,p_name order by p_mfgr,p_name) as dr, @@ -1214,9 +3410,8 @@ from noop(on order by p_mfgr )) PREHOOK: type: QUERY -PREHOOK: Input: default@part -#### A masked pattern was here #### POSTHOOK: query: -- 22. testMultiOperatorChainWithDiffPartitionForWindow1 +explain select p_mfgr, p_name, rank() over (partition by p_mfgr,p_name order by p_mfgr,p_name) as r, dense_rank() over (partition by p_mfgr,p_name order by p_mfgr,p_name) as dr, @@ -1232,6 +3427,126 @@ from noop(on order by p_mfgr )) POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-3 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-3 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_mfgr (type: string), p_name (type: string), p_mfgr (type: string), p_name (type: string) + sort order: ++++ + Map-reduce partition columns: p_mfgr (type: string), p_name (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int) + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col2 (type: string), _col2 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string), _col2 (type: string), _col1 (type: string) + sort order: ++++ + Map-reduce partition columns: _col2 (type: string), _col1 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col1 (type: string), _wcol0 (type: int), _wcol1 (type: int), _col5 (type: int), _wcol2 (type: bigint), _wcol2 (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr, p_name, +rank() over (partition by p_mfgr,p_name order by p_mfgr,p_name) as r, +dense_rank() over (partition by p_mfgr,p_name order by p_mfgr,p_name) as dr, +p_size, +sum(p_size) over (partition by p_mfgr,p_name order by p_mfgr,p_name rows between unbounded preceding and current row) as s1, +sum(p_size) over (partition by p_mfgr,p_name order by p_mfgr,p_name rows between unbounded preceding and current row) as s2 +from noop(on + noopwithmap(on + noop(on part + partition by p_mfgr, p_name + order by p_mfgr, p_name) + partition by p_mfgr + order by p_mfgr + )) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, p_name, +rank() over (partition by p_mfgr,p_name order by p_mfgr,p_name) as r, +dense_rank() over (partition by p_mfgr,p_name order by p_mfgr,p_name) as dr, +p_size, +sum(p_size) over (partition by p_mfgr,p_name order by p_mfgr,p_name rows between unbounded preceding and current row) as s1, +sum(p_size) over (partition by p_mfgr,p_name order by p_mfgr,p_name rows between unbounded preceding and current row) as s2 +from noop(on + noopwithmap(on + noop(on part + partition by p_mfgr, p_name + order by p_mfgr, p_name) + partition by p_mfgr + order by p_mfgr + )) +POSTHOOK: type: QUERY POSTHOOK: Input: default@part #### A masked pattern was here #### Manufacturer#1 almond antique burnished rose metallic 1 1 2 2 2 @@ -1261,6 +3576,7 @@ Manufacturer#5 almond antique sky peru orange 1 1 2 2 2 Manufacturer#5 almond aquamarine dodger light gainsboro 1 1 46 46 46 Manufacturer#5 almond azure blanched chiffon midnight 1 1 23 23 23 PREHOOK: query: -- 23. testMultiOperatorChainWithDiffPartitionForWindow2 +explain select p_mfgr, p_name, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -1274,9 +3590,8 @@ from noopwithmap(on order by p_mfgr, p_name) )) PREHOOK: type: QUERY -PREHOOK: Input: default@part -#### A masked pattern was here #### POSTHOOK: query: -- 23. testMultiOperatorChainWithDiffPartitionForWindow2 +explain select p_mfgr, p_name, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -1290,6 +3605,122 @@ from noopwithmap(on order by p_mfgr, p_name) )) POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-3 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-3 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_mfgr (type: string), p_name (type: string), p_mfgr (type: string), p_name (type: string) + sort order: ++++ + Map-reduce partition columns: p_mfgr (type: string), p_name (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int) + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string), _col2 (type: string), _col1 (type: string) + sort order: ++++ + Map-reduce partition columns: _col2 (type: string), _col1 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col1 (type: string), _wcol0 (type: int), _wcol1 (type: int), _col5 (type: int), _wcol2 (type: bigint), _wcol2 (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr, p_name, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +p_size, +sum(p_size) over (partition by p_mfgr order by p_name range between unbounded preceding and current row) as s1, +sum(p_size) over (partition by p_mfgr order by p_name range between unbounded preceding and current row) as s2 +from noopwithmap(on + noop(on + noop(on part + partition by p_mfgr, p_name + order by p_mfgr, p_name) + )) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, p_name, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +p_size, +sum(p_size) over (partition by p_mfgr order by p_name range between unbounded preceding and current row) as s1, +sum(p_size) over (partition by p_mfgr order by p_name range between unbounded preceding and current row) as s2 +from noopwithmap(on + noop(on + noop(on part + partition by p_mfgr, p_name + order by p_mfgr, p_name) + )) +POSTHOOK: type: QUERY POSTHOOK: Input: default@part #### A masked pattern was here #### Manufacturer#1 almond antique burnished rose metallic 1 1 2 4 4 diff --git ql/src/test/results/clientpositive/ptf_streaming.q.out ql/src/test/results/clientpositive/ptf_streaming.q.out index ef7ae88..b13e7b6 100644 --- ql/src/test/results/clientpositive/ptf_streaming.q.out +++ ql/src/test/results/clientpositive/ptf_streaming.q.out @@ -1,12 +1,7 @@ -PREHOOK: query: create temporary function noopstreaming as 'org.apache.hadoop.hive.ql.udf.ptf.NoopStreaming$NoopStreamingResolver' -PREHOOK: type: CREATEFUNCTION -PREHOOK: Output: noopstreaming -POSTHOOK: query: create temporary function noopstreaming as 'org.apache.hadoop.hive.ql.udf.ptf.NoopStreaming$NoopStreamingResolver' -POSTHOOK: type: CREATEFUNCTION -POSTHOOK: Output: noopstreaming PREHOOK: query: -- SORT_QUERY_RESULTS --1. test1 +explain select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -16,11 +11,10 @@ from noopstreaming(on part order by p_name ) PREHOOK: type: QUERY -PREHOOK: Input: default@part -#### A masked pattern was here #### POSTHOOK: query: -- SORT_QUERY_RESULTS --1. test1 +explain select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -30,6 +24,89 @@ from noopstreaming(on part order by p_name ) POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_mfgr (type: string), p_name (type: string) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int), p_retailprice (type: double) + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int), _col7 (type: double) + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr, p_name, p_size, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +sum(p_retailprice) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s1 +from noopstreaming(on part + partition by p_mfgr + order by p_name + ) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, p_name, p_size, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +sum(p_retailprice) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s1 +from noopstreaming(on part + partition by p_mfgr + order by p_name + ) +POSTHOOK: type: QUERY POSTHOOK: Input: default@part #### A masked pattern was here #### Manufacturer#1 almond antique burnished rose metallic 2 1 1 1173.15 @@ -59,21 +136,139 @@ Manufacturer#5 almond antique sky peru orange 2 3 3 5190.08 Manufacturer#5 almond aquamarine dodger light gainsboro 46 4 4 6208.18 Manufacturer#5 almond azure blanched chiffon midnight 23 5 5 7672.66 PREHOOK: query: -- 2. testJoinWithNoop +explain select p_mfgr, p_name, p_size, p_size - lag(p_size,1,p_size) over (partition by p_mfgr order by p_name) as deltaSz from noopstreaming (on (select p1.* from part p1 join part p2 on p1.p_partkey = p2.p_partkey) j distribute by j.p_mfgr sort by j.p_name) PREHOOK: type: QUERY -PREHOOK: Input: default@part -#### A masked pattern was here #### POSTHOOK: query: -- 2. testJoinWithNoop +explain select p_mfgr, p_name, p_size, p_size - lag(p_size,1,p_size) over (partition by p_mfgr order by p_name) as deltaSz from noopstreaming (on (select p1.* from part p1 join part p2 on p1.p_partkey = p2.p_partkey) j distribute by j.p_mfgr sort by j.p_name) POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-3 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-3 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: p1 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: p_partkey is not null (type: boolean) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_partkey (type: int) + sort order: + + Map-reduce partition columns: p_partkey (type: int) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int) + TableScan + alias: p2 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: p_partkey is not null (type: boolean) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_partkey (type: int) + sort order: + + Map-reduce partition columns: p_partkey (type: int) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 p_partkey (type: int) + 1 p_partkey (type: int) + outputColumnNames: _col1, _col2, _col5 + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) + Reduce Operator Tree: + Extract + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) + Reduce Operator Tree: + Extract + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), (_col5 - _wcol0) (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr, p_name, +p_size, p_size - lag(p_size,1,p_size) over (partition by p_mfgr order by p_name) as deltaSz +from noopstreaming (on (select p1.* from part p1 join part p2 on p1.p_partkey = p2.p_partkey) j +distribute by j.p_mfgr +sort by j.p_name) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, p_name, +p_size, p_size - lag(p_size,1,p_size) over (partition by p_mfgr order by p_name) as deltaSz +from noopstreaming (on (select p1.* from part p1 join part p2 on p1.p_partkey = p2.p_partkey) j +distribute by j.p_mfgr +sort by j.p_name) +POSTHOOK: type: QUERY POSTHOOK: Input: default@part #### A masked pattern was here #### Manufacturer#1 almond antique burnished rose metallic 2 0 @@ -105,21 +300,116 @@ Manufacturer#5 almond antique sky peru orange 2 -4 Manufacturer#5 almond aquamarine dodger light gainsboro 46 44 Manufacturer#5 almond azure blanched chiffon midnight 23 -23 PREHOOK: query: -- 7. testJoin +explain select abc.* from noopstreaming(on part partition by p_mfgr order by p_name ) abc join part p1 on abc.p_partkey = p1.p_partkey PREHOOK: type: QUERY -PREHOOK: Input: default@part -#### A masked pattern was here #### POSTHOOK: query: -- 7. testJoin +explain select abc.* from noopstreaming(on part partition by p_mfgr order by p_name ) abc join part p1 on abc.p_partkey = p1.p_partkey POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_mfgr (type: string), p_name (type: string) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: _col0 is not null (type: boolean) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + TableScan + alias: p1 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: p_partkey is not null (type: boolean) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_partkey (type: int) + sort order: + + Map-reduce partition columns: p_partkey (type: int) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 p_partkey (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select abc.* +from noopstreaming(on part +partition by p_mfgr +order by p_name +) abc join part p1 on abc.p_partkey = p1.p_partkey +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select abc.* +from noopstreaming(on part +partition by p_mfgr +order by p_name +) abc join part p1 on abc.p_partkey = p1.p_partkey +POSTHOOK: type: QUERY POSTHOOK: Input: default@part #### A masked pattern was here #### 105685 almond antique violet chocolate turquoise Manufacturer#2 Brand#22 MEDIUM ANODIZED COPPER 14 MED CAN 1690.68 ly pending requ @@ -151,21 +441,100 @@ POSTHOOK: Input: default@part 86428 almond aquamarine burnished black steel Manufacturer#1 Brand#12 STANDARD ANODIZED STEEL 28 WRAP BAG 1414.42 arefully 90681 almond antique chartreuse khaki white Manufacturer#3 Brand#31 MEDIUM BURNISHED TIN 17 SM CASE 1671.68 are slyly after the sl PREHOOK: query: -- 9. testNoopWithMap +explain select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name, p_size desc) as r from noopwithmapstreaming(on part partition by p_mfgr order by p_name, p_size desc) PREHOOK: type: QUERY -PREHOOK: Input: default@part -#### A masked pattern was here #### POSTHOOK: query: -- 9. testNoopWithMap +explain select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name, p_size desc) as r from noopwithmapstreaming(on part partition by p_mfgr order by p_name, p_size desc) POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_mfgr (type: string), p_name (type: string), p_size (type: int) + sort order: ++- + Map-reduce partition columns: p_mfgr (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int) + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int) + sort order: ++- + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr, p_name, p_size, +rank() over (partition by p_mfgr order by p_name, p_size desc) as r +from noopwithmapstreaming(on part +partition by p_mfgr +order by p_name, p_size desc) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, p_name, p_size, +rank() over (partition by p_mfgr order by p_name, p_size desc) as r +from noopwithmapstreaming(on part +partition by p_mfgr +order by p_name, p_size desc) +POSTHOOK: type: QUERY POSTHOOK: Input: default@part #### A masked pattern was here #### Manufacturer#1 almond antique burnished rose metallic 2 1 @@ -195,6 +564,7 @@ Manufacturer#5 almond antique sky peru orange 2 3 Manufacturer#5 almond aquamarine dodger light gainsboro 46 4 Manufacturer#5 almond azure blanched chiffon midnight 23 5 PREHOOK: query: -- 10. testNoopWithMapWithWindowing +explain select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -203,9 +573,8 @@ from noopwithmapstreaming(on part partition by p_mfgr order by p_name) PREHOOK: type: QUERY -PREHOOK: Input: default@part -#### A masked pattern was here #### POSTHOOK: query: -- 10. testNoopWithMapWithWindowing +explain select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -214,6 +583,89 @@ from noopwithmapstreaming(on part partition by p_mfgr order by p_name) POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_mfgr (type: string), p_name (type: string) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int), p_retailprice (type: double) + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int), _col7 (type: double) + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr, p_name, p_size, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +sum(p_retailprice) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s1 +from noopwithmapstreaming(on part + partition by p_mfgr + order by p_name) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, p_name, p_size, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +sum(p_retailprice) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s1 +from noopwithmapstreaming(on part + partition by p_mfgr + order by p_name) +POSTHOOK: type: QUERY POSTHOOK: Input: default@part #### A masked pattern was here #### Manufacturer#1 almond antique burnished rose metallic 2 1 1 1173.15 @@ -243,6 +695,7 @@ Manufacturer#5 almond antique sky peru orange 2 3 3 5190.08 Manufacturer#5 almond aquamarine dodger light gainsboro 46 4 4 6208.18 Manufacturer#5 almond azure blanched chiffon midnight 23 5 5 7672.66 PREHOOK: query: -- 12. testFunctionChain +explain select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -252,9 +705,8 @@ partition by p_mfgr order by p_mfgr, p_name ))) PREHOOK: type: QUERY -PREHOOK: Input: default@part -#### A masked pattern was here #### POSTHOOK: query: -- 12. testFunctionChain +explain select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -264,6 +716,114 @@ partition by p_mfgr order by p_mfgr, p_name ))) POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-3 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-3 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_mfgr (type: string), p_mfgr (type: string), p_name (type: string) + sort order: +++ + Map-reduce partition columns: p_mfgr (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int), p_retailprice (type: double) + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col2 (type: string), _col2 (type: string), _col1 (type: string) + sort order: +++ + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int), _col7 (type: double) + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int), _col7 (type: double) + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr, p_name, p_size, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +sum(p_retailprice) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s1 +from noopstreaming(on noopwithmapstreaming(on noopstreaming(on part +partition by p_mfgr +order by p_mfgr, p_name +))) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, p_name, p_size, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +sum(p_retailprice) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s1 +from noopstreaming(on noopwithmapstreaming(on noopstreaming(on part +partition by p_mfgr +order by p_mfgr, p_name +))) +POSTHOOK: type: QUERY POSTHOOK: Input: default@part #### A masked pattern was here #### Manufacturer#1 almond antique burnished rose metallic 2 1 1 1173.15 @@ -293,6 +853,7 @@ Manufacturer#5 almond antique sky peru orange 2 3 3 5190.08 Manufacturer#5 almond aquamarine dodger light gainsboro 46 4 4 6208.18 Manufacturer#5 almond azure blanched chiffon midnight 23 5 5 7672.66 PREHOOK: query: -- 12.1 testFunctionChain +explain select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -302,9 +863,8 @@ partition by p_mfgr order by p_mfgr, p_name ))) PREHOOK: type: QUERY -PREHOOK: Input: default@part -#### A masked pattern was here #### POSTHOOK: query: -- 12.1 testFunctionChain +explain select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -314,6 +874,114 @@ partition by p_mfgr order by p_mfgr, p_name ))) POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-3 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-3 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_mfgr (type: string), p_mfgr (type: string), p_name (type: string) + sort order: +++ + Map-reduce partition columns: p_mfgr (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int), p_retailprice (type: double) + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col2 (type: string), _col2 (type: string), _col1 (type: string) + sort order: +++ + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int), _col7 (type: double) + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int), _col7 (type: double) + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr, p_name, p_size, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +sum(p_retailprice) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s1 +from noopstreaming(on noopwithmap(on noopstreaming(on part +partition by p_mfgr +order by p_mfgr, p_name +))) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, p_name, p_size, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +sum(p_retailprice) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s1 +from noopstreaming(on noopwithmap(on noopstreaming(on part +partition by p_mfgr +order by p_mfgr, p_name +))) +POSTHOOK: type: QUERY POSTHOOK: Input: default@part #### A masked pattern was here #### Manufacturer#1 almond antique burnished rose metallic 2 1 1 1173.15 @@ -343,6 +1011,7 @@ Manufacturer#5 almond antique sky peru orange 2 3 3 5190.08 Manufacturer#5 almond aquamarine dodger light gainsboro 46 4 4 6208.18 Manufacturer#5 almond azure blanched chiffon midnight 23 5 5 7672.66 PREHOOK: query: -- 12.2 testFunctionChain +explain select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -352,9 +1021,8 @@ partition by p_mfgr order by p_mfgr, p_name ))) PREHOOK: type: QUERY -PREHOOK: Input: default@part -#### A masked pattern was here #### POSTHOOK: query: -- 12.2 testFunctionChain +explain select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -364,6 +1032,114 @@ partition by p_mfgr order by p_mfgr, p_name ))) POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-3 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-3 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_mfgr (type: string), p_mfgr (type: string), p_name (type: string) + sort order: +++ + Map-reduce partition columns: p_mfgr (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int), p_retailprice (type: double) + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col2 (type: string), _col2 (type: string), _col1 (type: string) + sort order: +++ + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int), _col7 (type: double) + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int), _col7 (type: double) + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr, p_name, p_size, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +sum(p_retailprice) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s1 +from noop(on noopwithmapstreaming(on noopstreaming(on part +partition by p_mfgr +order by p_mfgr, p_name +))) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, p_name, p_size, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +sum(p_retailprice) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s1 +from noop(on noopwithmapstreaming(on noopstreaming(on part +partition by p_mfgr +order by p_mfgr, p_name +))) +POSTHOOK: type: QUERY POSTHOOK: Input: default@part #### A masked pattern was here #### Manufacturer#1 almond antique burnished rose metallic 2 1 1 1173.15 @@ -393,6 +1169,7 @@ Manufacturer#5 almond antique sky peru orange 2 3 3 5190.08 Manufacturer#5 almond aquamarine dodger light gainsboro 46 4 4 6208.18 Manufacturer#5 almond azure blanched chiffon midnight 23 5 5 7672.66 PREHOOK: query: -- 14. testPTFJoinWithWindowingWithCount +explain select abc.p_mfgr, abc.p_name, rank() over (distribute by abc.p_mfgr sort by abc.p_name) as r, dense_rank() over (distribute by abc.p_mfgr sort by abc.p_name) as dr, @@ -404,9 +1181,8 @@ partition by p_mfgr order by p_name ) abc join part p1 on abc.p_partkey = p1.p_partkey PREHOOK: type: QUERY -PREHOOK: Input: default@part -#### A masked pattern was here #### POSTHOOK: query: -- 14. testPTFJoinWithWindowingWithCount +explain select abc.p_mfgr, abc.p_name, rank() over (distribute by abc.p_mfgr sort by abc.p_name) as r, dense_rank() over (distribute by abc.p_mfgr sort by abc.p_name) as dr, @@ -418,6 +1194,134 @@ partition by p_mfgr order by p_name ) abc join part p1 on abc.p_partkey = p1.p_partkey POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-3 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-3 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_mfgr (type: string), p_name (type: string) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_size (type: int), p_retailprice (type: double) + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: _col0 is not null (type: boolean) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int), _col7 (type: double) + TableScan + alias: p1 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: p_partkey is not null (type: boolean) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_partkey (type: int) + sort order: + + Map-reduce partition columns: p_partkey (type: int) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 p_partkey (type: int) + outputColumnNames: _col1, _col2, _col5, _col7 + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int), _col7 (type: double) + Reduce Operator Tree: + Extract + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col1 (type: string), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: bigint), _col7 (type: double), _wcol3 (type: double), _col5 (type: int), (_col5 - _wcol4) (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select abc.p_mfgr, abc.p_name, +rank() over (distribute by abc.p_mfgr sort by abc.p_name) as r, +dense_rank() over (distribute by abc.p_mfgr sort by abc.p_name) as dr, +count(abc.p_name) over (distribute by abc.p_mfgr sort by abc.p_name) as cd, +abc.p_retailprice, sum(abc.p_retailprice) over (distribute by abc.p_mfgr sort by abc.p_name rows between unbounded preceding and current row) as s1, +abc.p_size, abc.p_size - lag(abc.p_size,1,abc.p_size) over (distribute by abc.p_mfgr sort by abc.p_name) as deltaSz +from noopstreaming(on part +partition by p_mfgr +order by p_name +) abc join part p1 on abc.p_partkey = p1.p_partkey +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select abc.p_mfgr, abc.p_name, +rank() over (distribute by abc.p_mfgr sort by abc.p_name) as r, +dense_rank() over (distribute by abc.p_mfgr sort by abc.p_name) as dr, +count(abc.p_name) over (distribute by abc.p_mfgr sort by abc.p_name) as cd, +abc.p_retailprice, sum(abc.p_retailprice) over (distribute by abc.p_mfgr sort by abc.p_name rows between unbounded preceding and current row) as s1, +abc.p_size, abc.p_size - lag(abc.p_size,1,abc.p_size) over (distribute by abc.p_mfgr sort by abc.p_name) as deltaSz +from noopstreaming(on part +partition by p_mfgr +order by p_name +) abc join part p1 on abc.p_partkey = p1.p_partkey +POSTHOOK: type: QUERY POSTHOOK: Input: default@part #### A masked pattern was here #### Manufacturer#1 almond antique burnished rose metallic 1 1 4 1173.15 1173.15 2 0 @@ -449,6 +1353,7 @@ Manufacturer#5 almond antique sky peru orange 3 3 3 1788.73 5190.08 2 -4 Manufacturer#5 almond aquamarine dodger light gainsboro 4 4 4 1018.1 6208.18 46 44 Manufacturer#5 almond azure blanched chiffon midnight 5 5 5 1464.48 7672.66 23 -23 PREHOOK: query: -- 18. testMulti2OperatorsFunctionChainWithMap +explain select p_mfgr, p_name, rank() over (partition by p_mfgr,p_name) as r, dense_rank() over (partition by p_mfgr,p_name) as dr, @@ -465,9 +1370,8 @@ from noopstreaming(on partition by p_mfgr,p_name order by p_mfgr,p_name) PREHOOK: type: QUERY -PREHOOK: Input: default@part -#### A masked pattern was here #### POSTHOOK: query: -- 18. testMulti2OperatorsFunctionChainWithMap +explain select p_mfgr, p_name, rank() over (partition by p_mfgr,p_name) as r, dense_rank() over (partition by p_mfgr,p_name) as dr, @@ -484,6 +1388,128 @@ from noopstreaming(on partition by p_mfgr,p_name order by p_mfgr,p_name) POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-3 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-3 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_mfgr (type: string), p_mfgr (type: string) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int) + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string), _col2 (type: string), _col1 (type: string) + sort order: ++++ + Map-reduce partition columns: _col2 (type: string), _col1 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string), _col2 (type: string), _col1 (type: string) + sort order: ++++ + Map-reduce partition columns: _col2 (type: string), _col1 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col1 (type: string), _wcol0 (type: int), _wcol1 (type: int), _col5 (type: int), _wcol2 (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr, p_name, +rank() over (partition by p_mfgr,p_name) as r, +dense_rank() over (partition by p_mfgr,p_name) as dr, +p_size, sum(p_size) over (partition by p_mfgr,p_name rows between unbounded preceding and current row) as s1 +from noopstreaming(on + noopwithmap(on + noop(on + noopstreaming(on part + partition by p_mfgr + order by p_mfgr) + ) + partition by p_mfgr,p_name + order by p_mfgr,p_name) + partition by p_mfgr,p_name + order by p_mfgr,p_name) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, p_name, +rank() over (partition by p_mfgr,p_name) as r, +dense_rank() over (partition by p_mfgr,p_name) as dr, +p_size, sum(p_size) over (partition by p_mfgr,p_name rows between unbounded preceding and current row) as s1 +from noopstreaming(on + noopwithmap(on + noop(on + noopstreaming(on part + partition by p_mfgr + order by p_mfgr) + ) + partition by p_mfgr,p_name + order by p_mfgr,p_name) + partition by p_mfgr,p_name + order by p_mfgr,p_name) +POSTHOOK: type: QUERY POSTHOOK: Input: default@part #### A masked pattern was here #### Manufacturer#1 almond antique burnished rose metallic 1 1 2 2 @@ -513,6 +1539,7 @@ Manufacturer#5 almond antique sky peru orange 1 1 2 2 Manufacturer#5 almond aquamarine dodger light gainsboro 1 1 46 46 Manufacturer#5 almond azure blanched chiffon midnight 1 1 23 23 PREHOOK: query: -- 19. testMulti3OperatorsFunctionChain +explain select p_mfgr, p_name, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -529,9 +1556,8 @@ from noop(on partition by p_mfgr order by p_mfgr ) PREHOOK: type: QUERY -PREHOOK: Input: default@part -#### A masked pattern was here #### POSTHOOK: query: -- 19. testMulti3OperatorsFunctionChain +explain select p_mfgr, p_name, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -548,6 +1574,149 @@ from noop(on partition by p_mfgr order by p_mfgr ) POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-3 depends on stages: Stage-2 + Stage-4 depends on stages: Stage-3 + Stage-0 depends on stages: Stage-4 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_mfgr (type: string), p_mfgr (type: string) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int) + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string), _col2 (type: string), _col1 (type: string) + sort order: ++++ + Map-reduce partition columns: _col2 (type: string), _col1 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col2 (type: string), _col2 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col1 (type: string), _wcol0 (type: int), _wcol1 (type: int), _col5 (type: int), _wcol2 (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr, p_name, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +p_size, sum(p_size) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s1 +from noop(on + noopstreaming(on + noop(on + noopstreaming(on part + partition by p_mfgr + order by p_mfgr) + ) + partition by p_mfgr,p_name + order by p_mfgr,p_name) + partition by p_mfgr + order by p_mfgr ) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, p_name, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +p_size, sum(p_size) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s1 +from noop(on + noopstreaming(on + noop(on + noopstreaming(on part + partition by p_mfgr + order by p_mfgr) + ) + partition by p_mfgr,p_name + order by p_mfgr,p_name) + partition by p_mfgr + order by p_mfgr ) +POSTHOOK: type: QUERY POSTHOOK: Input: default@part #### A masked pattern was here #### Manufacturer#1 almond antique burnished rose metallic 1 1 2 2 @@ -577,6 +1746,7 @@ Manufacturer#5 almond antique sky peru orange 3 3 2 39 Manufacturer#5 almond aquamarine dodger light gainsboro 4 4 46 85 Manufacturer#5 almond azure blanched chiffon midnight 5 5 23 108 PREHOOK: query: -- 23. testMultiOperatorChainWithDiffPartitionForWindow2 +explain select p_mfgr, p_name, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -590,9 +1760,8 @@ from noopwithmapstreaming(on order by p_mfgr, p_name) )) PREHOOK: type: QUERY -PREHOOK: Input: default@part -#### A masked pattern was here #### POSTHOOK: query: -- 23. testMultiOperatorChainWithDiffPartitionForWindow2 +explain select p_mfgr, p_name, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -606,6 +1775,122 @@ from noopwithmapstreaming(on order by p_mfgr, p_name) )) POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-3 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-3 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_mfgr (type: string), p_name (type: string), p_mfgr (type: string), p_name (type: string) + sort order: ++++ + Map-reduce partition columns: p_mfgr (type: string), p_name (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int) + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string), _col2 (type: string), _col1 (type: string) + sort order: ++++ + Map-reduce partition columns: _col2 (type: string), _col1 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col1 (type: string), _wcol0 (type: int), _wcol1 (type: int), _col5 (type: int), _wcol2 (type: bigint), _wcol2 (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr, p_name, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +p_size, +sum(p_size) over (partition by p_mfgr order by p_name range between unbounded preceding and current row) as s1, +sum(p_size) over (partition by p_mfgr order by p_name range between unbounded preceding and current row) as s2 +from noopwithmapstreaming(on + noop(on + noopstreaming(on part + partition by p_mfgr, p_name + order by p_mfgr, p_name) + )) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, p_name, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +p_size, +sum(p_size) over (partition by p_mfgr order by p_name range between unbounded preceding and current row) as s1, +sum(p_size) over (partition by p_mfgr order by p_name range between unbounded preceding and current row) as s2 +from noopwithmapstreaming(on + noop(on + noopstreaming(on part + partition by p_mfgr, p_name + order by p_mfgr, p_name) + )) +POSTHOOK: type: QUERY POSTHOOK: Input: default@part #### A masked pattern was here #### Manufacturer#1 almond antique burnished rose metallic 1 1 2 4 4 diff --git ql/src/test/results/clientpositive/spark/ptf.q.out ql/src/test/results/clientpositive/spark/ptf.q.out index 9196b94..9e809b8 100644 --- ql/src/test/results/clientpositive/spark/ptf.q.out +++ ql/src/test/results/clientpositive/spark/ptf.q.out @@ -1,6 +1,7 @@ PREHOOK: query: -- SORT_QUERY_RESULTS --1. test1 +explain select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -10,11 +11,10 @@ from noop(on part order by p_name ) PREHOOK: type: QUERY -PREHOOK: Input: default@part -#### A masked pattern was here #### POSTHOOK: query: -- SORT_QUERY_RESULTS --1. test1 +explain select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -24,6 +24,85 @@ from noop(on part order by p_name ) POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_mfgr (type: string), p_name (type: string) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int), p_retailprice (type: double) + Reducer 2 + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int), _col7 (type: double) + Reducer 3 + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr, p_name, p_size, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +sum(p_retailprice) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s1 +from noop(on part + partition by p_mfgr + order by p_name + ) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, p_name, p_size, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +sum(p_retailprice) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s1 +from noop(on part + partition by p_mfgr + order by p_name + ) +POSTHOOK: type: QUERY POSTHOOK: Input: default@part #### A masked pattern was here #### Manufacturer#1 almond antique burnished rose metallic 2 1 1 1173.15 @@ -53,21 +132,127 @@ Manufacturer#5 almond antique sky peru orange 2 3 3 5190.08 Manufacturer#5 almond aquamarine dodger light gainsboro 46 4 4 6208.18 Manufacturer#5 almond azure blanched chiffon midnight 23 5 5 7672.66 PREHOOK: query: -- 2. testJoinWithNoop +explain select p_mfgr, p_name, p_size, p_size - lag(p_size,1,p_size) over (partition by p_mfgr order by p_name) as deltaSz from noop (on (select p1.* from part p1 join part p2 on p1.p_partkey = p2.p_partkey) j distribute by j.p_mfgr sort by j.p_name) PREHOOK: type: QUERY -PREHOOK: Input: default@part -#### A masked pattern was here #### POSTHOOK: query: -- 2. testJoinWithNoop +explain select p_mfgr, p_name, p_size, p_size - lag(p_size,1,p_size) over (partition by p_mfgr order by p_name) as deltaSz from noop (on (select p1.* from part p1 join part p2 on p1.p_partkey = p2.p_partkey) j distribute by j.p_mfgr sort by j.p_name) POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 5 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 2) + Reducer 4 <- Reducer 3 (PARTITION-LEVEL SORT, 2) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: p1 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: p_partkey is not null (type: boolean) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_partkey (type: int) + sort order: + + Map-reduce partition columns: p_partkey (type: int) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int) + Map 5 + Map Operator Tree: + TableScan + alias: p2 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: p_partkey is not null (type: boolean) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_partkey (type: int) + sort order: + + Map-reduce partition columns: p_partkey (type: int) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 p_partkey (type: int) + 1 p_partkey (type: int) + outputColumnNames: _col1, _col2, _col5 + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) + Reducer 3 + Reduce Operator Tree: + Extract + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) + Reducer 4 + Reduce Operator Tree: + Extract + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), (_col5 - _wcol0) (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr, p_name, +p_size, p_size - lag(p_size,1,p_size) over (partition by p_mfgr order by p_name) as deltaSz +from noop (on (select p1.* from part p1 join part p2 on p1.p_partkey = p2.p_partkey) j +distribute by j.p_mfgr +sort by j.p_name) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, p_name, +p_size, p_size - lag(p_size,1,p_size) over (partition by p_mfgr order by p_name) as deltaSz +from noop (on (select p1.* from part p1 join part p2 on p1.p_partkey = p2.p_partkey) j +distribute by j.p_mfgr +sort by j.p_name) +POSTHOOK: type: QUERY POSTHOOK: Input: default@part #### A masked pattern was here #### Manufacturer#1 almond antique burnished rose metallic 2 0 @@ -99,19 +284,77 @@ Manufacturer#5 almond antique sky peru orange 2 -4 Manufacturer#5 almond aquamarine dodger light gainsboro 46 44 Manufacturer#5 almond azure blanched chiffon midnight 23 -23 PREHOOK: query: -- 3. testOnlyPTF +explain select p_mfgr, p_name, p_size from noop(on part partition by p_mfgr order by p_name) PREHOOK: type: QUERY -PREHOOK: Input: default@part -#### A masked pattern was here #### POSTHOOK: query: -- 3. testOnlyPTF +explain select p_mfgr, p_name, p_size from noop(on part partition by p_mfgr order by p_name) POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_mfgr (type: string), p_name (type: string) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int) + Reducer 2 + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr, p_name, p_size +from noop(on part +partition by p_mfgr +order by p_name) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, p_name, p_size +from noop(on part +partition by p_mfgr +order by p_name) +POSTHOOK: type: QUERY POSTHOOK: Input: default@part #### A masked pattern was here #### Manufacturer#1 almond antique burnished rose metallic 2 @@ -141,6 +384,7 @@ Manufacturer#5 almond antique sky peru orange 2 Manufacturer#5 almond aquamarine dodger light gainsboro 46 Manufacturer#5 almond azure blanched chiffon midnight 23 PREHOOK: query: -- 4. testPTFAlias +explain select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -150,9 +394,8 @@ from noop(on part order by p_name ) abc PREHOOK: type: QUERY -PREHOOK: Input: default@part -#### A masked pattern was here #### POSTHOOK: query: -- 4. testPTFAlias +explain select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -162,6 +405,85 @@ from noop(on part order by p_name ) abc POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 2) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_mfgr (type: string), p_name (type: string) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int), p_retailprice (type: double) + Reducer 2 + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int), _col7 (type: double) + Reducer 3 + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr, p_name, p_size, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +sum(p_retailprice) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s1 +from noop(on part + partition by p_mfgr + order by p_name + ) abc +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, p_name, p_size, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +sum(p_retailprice) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s1 +from noop(on part + partition by p_mfgr + order by p_name + ) abc +POSTHOOK: type: QUERY POSTHOOK: Input: default@part #### A masked pattern was here #### Manufacturer#1 almond antique burnished rose metallic 2 1 1 1173.15 @@ -191,6 +513,7 @@ Manufacturer#5 almond antique sky peru orange 2 3 3 5190.08 Manufacturer#5 almond aquamarine dodger light gainsboro 46 4 4 6208.18 Manufacturer#5 almond azure blanched chiffon midnight 23 5 5 7672.66 PREHOOK: query: -- 5. testPTFAndWhereWithWindowing +explain select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -200,9 +523,8 @@ from noop(on part order by p_name ) PREHOOK: type: QUERY -PREHOOK: Input: default@part -#### A masked pattern was here #### POSTHOOK: query: -- 5. testPTFAndWhereWithWindowing +explain select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -212,6 +534,85 @@ from noop(on part order by p_name ) POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 2) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_mfgr (type: string), p_name (type: string) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int) + Reducer 2 + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) + Reducer 3 + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _col5 (type: int), (_col5 - _wcol2) (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr, p_name, p_size, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +p_size, p_size - lag(p_size,1,p_size) over (partition by p_mfgr order by p_name) as deltaSz +from noop(on part + partition by p_mfgr + order by p_name + ) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, p_name, p_size, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +p_size, p_size - lag(p_size,1,p_size) over (partition by p_mfgr order by p_name) as deltaSz +from noop(on part + partition by p_mfgr + order by p_name + ) +POSTHOOK: type: QUERY POSTHOOK: Input: default@part #### A masked pattern was here #### Manufacturer#1 almond antique burnished rose metallic 2 1 1 2 0 @@ -241,6 +642,7 @@ Manufacturer#5 almond antique sky peru orange 2 3 3 2 -4 Manufacturer#5 almond aquamarine dodger light gainsboro 46 4 4 46 44 Manufacturer#5 almond azure blanched chiffon midnight 23 5 5 23 -23 PREHOOK: query: -- 6. testSWQAndPTFAndGBy +explain select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -251,9 +653,8 @@ from noop(on part ) group by p_mfgr, p_name, p_size PREHOOK: type: QUERY -PREHOOK: Input: default@part -#### A masked pattern was here #### POSTHOOK: query: -- 6. testSWQAndPTFAndGBy +explain select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -264,6 +665,109 @@ from noop(on part ) group by p_mfgr, p_name, p_size POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 2 (GROUP, 2) + Reducer 4 <- Reducer 3 (PARTITION-LEVEL SORT, 2) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_mfgr (type: string), p_name (type: string) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int) + Reducer 2 + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int) + outputColumnNames: _col2, _col1, _col5 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col2 (type: string), _col1 (type: string), _col5 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reducer 3 + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + Reducer 4 + Reduce Operator Tree: + Extract + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int), _wcol0 (type: int), _wcol1 (type: int), _col2 (type: int), (_col2 - _wcol2) (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr, p_name, p_size, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +p_size, p_size - lag(p_size,1,p_size) over (partition by p_mfgr order by p_name) as deltaSz +from noop(on part + partition by p_mfgr + order by p_name + ) +group by p_mfgr, p_name, p_size +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, p_name, p_size, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +p_size, p_size - lag(p_size,1,p_size) over (partition by p_mfgr order by p_name) as deltaSz +from noop(on part + partition by p_mfgr + order by p_name + ) +group by p_mfgr, p_name, p_size +POSTHOOK: type: QUERY POSTHOOK: Input: default@part #### A masked pattern was here #### Manufacturer#1 almond antique burnished rose metallic 2 1 1 2 0 @@ -292,21 +796,114 @@ Manufacturer#5 almond antique sky peru orange 2 3 3 2 -4 Manufacturer#5 almond aquamarine dodger light gainsboro 46 4 4 46 44 Manufacturer#5 almond azure blanched chiffon midnight 23 5 5 23 -23 PREHOOK: query: -- 7. testJoin +explain select abc.* from noop(on part partition by p_mfgr order by p_name ) abc join part p1 on abc.p_partkey = p1.p_partkey PREHOOK: type: QUERY -PREHOOK: Input: default@part -#### A masked pattern was here #### POSTHOOK: query: -- 7. testJoin +explain select abc.* from noop(on part partition by p_mfgr order by p_name ) abc join part p1 on abc.p_partkey = p1.p_partkey POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Map 4 (PARTITION-LEVEL SORT, 2), Reducer 2 (PARTITION-LEVEL SORT, 2) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_mfgr (type: string), p_name (type: string) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + Map 4 + Map Operator Tree: + TableScan + alias: p1 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: p_partkey is not null (type: boolean) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_partkey (type: int) + sort order: + + Map-reduce partition columns: p_partkey (type: int) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: _col0 is not null (type: boolean) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + Reducer 3 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 p_partkey (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select abc.* +from noop(on part +partition by p_mfgr +order by p_name +) abc join part p1 on abc.p_partkey = p1.p_partkey +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select abc.* +from noop(on part +partition by p_mfgr +order by p_name +) abc join part p1 on abc.p_partkey = p1.p_partkey +POSTHOOK: type: QUERY POSTHOOK: Input: default@part #### A masked pattern was here #### 105685 almond antique violet chocolate turquoise Manufacturer#2 Brand#22 MEDIUM ANODIZED COPPER 14 MED CAN 1690.68 ly pending requ @@ -338,21 +935,114 @@ POSTHOOK: Input: default@part 86428 almond aquamarine burnished black steel Manufacturer#1 Brand#12 STANDARD ANODIZED STEEL 28 WRAP BAG 1414.42 arefully 90681 almond antique chartreuse khaki white Manufacturer#3 Brand#31 MEDIUM BURNISHED TIN 17 SM CASE 1671.68 are slyly after the sl PREHOOK: query: -- 8. testJoinRight +explain select abc.* from part p1 join noop(on part partition by p_mfgr order by p_name ) abc on abc.p_partkey = p1.p_partkey PREHOOK: type: QUERY -PREHOOK: Input: default@part -#### A masked pattern was here #### POSTHOOK: query: -- 8. testJoinRight +explain select abc.* from part p1 join noop(on part partition by p_mfgr order by p_name ) abc on abc.p_partkey = p1.p_partkey POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 4 (PARTITION-LEVEL SORT, 2) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: p1 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: p_partkey is not null (type: boolean) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_partkey (type: int) + sort order: + + Map-reduce partition columns: p_partkey (type: int) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Map 3 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_mfgr (type: string), p_name (type: string) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 p_partkey (type: int) + 1 _col0 (type: int) + outputColumnNames: _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col12 (type: int), _col13 (type: string), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: _col0 is not null (type: boolean) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select abc.* +from part p1 join noop(on part +partition by p_mfgr +order by p_name +) abc on abc.p_partkey = p1.p_partkey +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select abc.* +from part p1 join noop(on part +partition by p_mfgr +order by p_name +) abc on abc.p_partkey = p1.p_partkey +POSTHOOK: type: QUERY POSTHOOK: Input: default@part #### A masked pattern was here #### 105685 almond antique violet chocolate turquoise Manufacturer#2 Brand#22 MEDIUM ANODIZED COPPER 14 MED CAN 1690.68 ly pending requ @@ -384,21 +1074,96 @@ POSTHOOK: Input: default@part 86428 almond aquamarine burnished black steel Manufacturer#1 Brand#12 STANDARD ANODIZED STEEL 28 WRAP BAG 1414.42 arefully 90681 almond antique chartreuse khaki white Manufacturer#3 Brand#31 MEDIUM BURNISHED TIN 17 SM CASE 1671.68 are slyly after the sl PREHOOK: query: -- 9. testNoopWithMap +explain select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name, p_size desc) as r from noopwithmap(on part partition by p_mfgr order by p_name, p_size desc) PREHOOK: type: QUERY -PREHOOK: Input: default@part -#### A masked pattern was here #### POSTHOOK: query: -- 9. testNoopWithMap +explain select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name, p_size desc) as r from noopwithmap(on part partition by p_mfgr order by p_name, p_size desc) POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 2) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_mfgr (type: string), p_name (type: string), p_size (type: int) + sort order: ++- + Map-reduce partition columns: p_mfgr (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int) + Reducer 2 + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int) + sort order: ++- + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) + Reducer 3 + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr, p_name, p_size, +rank() over (partition by p_mfgr order by p_name, p_size desc) as r +from noopwithmap(on part +partition by p_mfgr +order by p_name, p_size desc) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, p_name, p_size, +rank() over (partition by p_mfgr order by p_name, p_size desc) as r +from noopwithmap(on part +partition by p_mfgr +order by p_name, p_size desc) +POSTHOOK: type: QUERY POSTHOOK: Input: default@part #### A masked pattern was here #### Manufacturer#1 almond antique burnished rose metallic 2 1 @@ -428,6 +1193,7 @@ Manufacturer#5 almond antique sky peru orange 2 3 Manufacturer#5 almond aquamarine dodger light gainsboro 46 4 Manufacturer#5 almond azure blanched chiffon midnight 23 5 PREHOOK: query: -- 10. testNoopWithMapWithWindowing +explain select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -436,9 +1202,8 @@ from noopwithmap(on part partition by p_mfgr order by p_name) PREHOOK: type: QUERY -PREHOOK: Input: default@part -#### A masked pattern was here #### POSTHOOK: query: -- 10. testNoopWithMapWithWindowing +explain select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -447,6 +1212,85 @@ from noopwithmap(on part partition by p_mfgr order by p_name) POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 2) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_mfgr (type: string), p_name (type: string) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int), p_retailprice (type: double) + Reducer 2 + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int), _col7 (type: double) + Reducer 3 + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr, p_name, p_size, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +sum(p_retailprice) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s1 +from noopwithmap(on part + partition by p_mfgr + order by p_name) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, p_name, p_size, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +sum(p_retailprice) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s1 +from noopwithmap(on part + partition by p_mfgr + order by p_name) +POSTHOOK: type: QUERY POSTHOOK: Input: default@part #### A masked pattern was here #### Manufacturer#1 almond antique burnished rose metallic 2 1 1 1173.15 @@ -476,6 +1320,7 @@ Manufacturer#5 almond antique sky peru orange 2 3 3 5190.08 Manufacturer#5 almond aquamarine dodger light gainsboro 46 4 4 6208.18 Manufacturer#5 almond azure blanched chiffon midnight 23 5 5 7672.66 PREHOOK: query: -- 11. testHavingWithWindowingPTFNoGBY +explain select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -484,9 +1329,8 @@ from noop(on part partition by p_mfgr order by p_name) PREHOOK: type: QUERY -PREHOOK: Input: default@part -#### A masked pattern was here #### POSTHOOK: query: -- 11. testHavingWithWindowingPTFNoGBY +explain select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -495,6 +1339,83 @@ from noop(on part partition by p_mfgr order by p_name) POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 2) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_mfgr (type: string), p_name (type: string) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int), p_retailprice (type: double) + Reducer 2 + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int), _col7 (type: double) + Reducer 3 + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr, p_name, p_size, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +sum(p_retailprice) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s1 +from noop(on part +partition by p_mfgr +order by p_name) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, p_name, p_size, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +sum(p_retailprice) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s1 +from noop(on part +partition by p_mfgr +order by p_name) +POSTHOOK: type: QUERY POSTHOOK: Input: default@part #### A masked pattern was here #### Manufacturer#1 almond antique burnished rose metallic 2 1 1 1173.15 @@ -524,6 +1445,7 @@ Manufacturer#5 almond antique sky peru orange 2 3 3 5190.08 Manufacturer#5 almond aquamarine dodger light gainsboro 46 4 4 6208.18 Manufacturer#5 almond azure blanched chiffon midnight 23 5 5 7672.66 PREHOOK: query: -- 12. testFunctionChain +explain select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -533,9 +1455,8 @@ partition by p_mfgr order by p_mfgr, p_name ))) PREHOOK: type: QUERY -PREHOOK: Input: default@part -#### A masked pattern was here #### POSTHOOK: query: -- 12. testFunctionChain +explain select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -545,6 +1466,100 @@ partition by p_mfgr order by p_mfgr, p_name ))) POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 2) + Reducer 4 <- Reducer 3 (PARTITION-LEVEL SORT, 2) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_mfgr (type: string), p_mfgr (type: string), p_name (type: string) + sort order: +++ + Map-reduce partition columns: p_mfgr (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int), p_retailprice (type: double) + Reducer 2 + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: string), _col2 (type: string), _col1 (type: string) + sort order: +++ + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int), _col7 (type: double) + Reducer 3 + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int), _col7 (type: double) + Reducer 4 + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr, p_name, p_size, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +sum(p_retailprice) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s1 +from noop(on noopwithmap(on noop(on part +partition by p_mfgr +order by p_mfgr, p_name +))) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, p_name, p_size, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +sum(p_retailprice) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s1 +from noop(on noopwithmap(on noop(on part +partition by p_mfgr +order by p_mfgr, p_name +))) +POSTHOOK: type: QUERY POSTHOOK: Input: default@part #### A masked pattern was here #### Manufacturer#1 almond antique burnished rose metallic 2 1 1 1173.15 @@ -574,6 +1589,7 @@ Manufacturer#5 almond antique sky peru orange 2 3 3 5190.08 Manufacturer#5 almond aquamarine dodger light gainsboro 46 4 4 6208.18 Manufacturer#5 almond azure blanched chiffon midnight 23 5 5 7672.66 PREHOOK: query: -- 13. testPTFAndWindowingInSubQ +explain select p_mfgr, p_name, sub1.cd, sub1.s1 from (select p_mfgr, p_name, @@ -586,9 +1602,8 @@ order by p_name) window w1 as (partition by p_mfgr order by p_name rows between 2 preceding and 2 following) ) sub1 PREHOOK: type: QUERY -PREHOOK: Input: default@part -#### A masked pattern was here #### POSTHOOK: query: -- 13. testPTFAndWindowingInSubQ +explain select p_mfgr, p_name, sub1.cd, sub1.s1 from (select p_mfgr, p_name, @@ -601,6 +1616,91 @@ order by p_name) window w1 as (partition by p_mfgr order by p_name rows between 2 preceding and 2 following) ) sub1 POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 2) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_mfgr (type: string), p_name (type: string) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int), p_retailprice (type: double) + Reducer 2 + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int), _col7 (type: double) + Reducer 3 + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col1 (type: string), _wcol0 (type: bigint), _wcol1 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr, p_name, +sub1.cd, sub1.s1 +from (select p_mfgr, p_name, +count(p_size) over (partition by p_mfgr order by p_name) as cd, +p_retailprice, +sum(p_retailprice) over w1 as s1 +from noop(on part +partition by p_mfgr +order by p_name) +window w1 as (partition by p_mfgr order by p_name rows between 2 preceding and 2 following) +) sub1 +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, p_name, +sub1.cd, sub1.s1 +from (select p_mfgr, p_name, +count(p_size) over (partition by p_mfgr order by p_name) as cd, +p_retailprice, +sum(p_retailprice) over w1 as s1 +from noop(on part +partition by p_mfgr +order by p_name) +window w1 as (partition by p_mfgr order by p_name rows between 2 preceding and 2 following) +) sub1 +POSTHOOK: type: QUERY POSTHOOK: Input: default@part #### A masked pattern was here #### Manufacturer#1 almond antique burnished rose metallic 2 4100.06 @@ -630,6 +1730,7 @@ Manufacturer#5 almond antique sky peru orange 3 7672.66 Manufacturer#5 almond aquamarine dodger light gainsboro 4 5882.970000000001 Manufacturer#5 almond azure blanched chiffon midnight 5 4271.3099999999995 PREHOOK: query: -- 14. testPTFJoinWithWindowingWithCount +explain select abc.p_mfgr, abc.p_name, rank() over (distribute by abc.p_mfgr sort by abc.p_name) as r, dense_rank() over (distribute by abc.p_mfgr sort by abc.p_name) as dr, @@ -641,9 +1742,8 @@ partition by p_mfgr order by p_name ) abc join part p1 on abc.p_partkey = p1.p_partkey PREHOOK: type: QUERY -PREHOOK: Input: default@part -#### A masked pattern was here #### POSTHOOK: query: -- 14. testPTFJoinWithWindowingWithCount +explain select abc.p_mfgr, abc.p_name, rank() over (distribute by abc.p_mfgr sort by abc.p_name) as r, dense_rank() over (distribute by abc.p_mfgr sort by abc.p_name) as dr, @@ -655,6 +1755,122 @@ partition by p_mfgr order by p_name ) abc join part p1 on abc.p_partkey = p1.p_partkey POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Map 5 (PARTITION-LEVEL SORT, 2), Reducer 2 (PARTITION-LEVEL SORT, 2) + Reducer 4 <- Reducer 3 (PARTITION-LEVEL SORT, 2) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_mfgr (type: string), p_name (type: string) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_size (type: int), p_retailprice (type: double) + Map 5 + Map Operator Tree: + TableScan + alias: p1 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: p_partkey is not null (type: boolean) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_partkey (type: int) + sort order: + + Map-reduce partition columns: p_partkey (type: int) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: _col0 is not null (type: boolean) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int), _col7 (type: double) + Reducer 3 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 p_partkey (type: int) + outputColumnNames: _col1, _col2, _col5, _col7 + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int), _col7 (type: double) + Reducer 4 + Reduce Operator Tree: + Extract + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col1 (type: string), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: bigint), _col7 (type: double), _wcol3 (type: double), _col5 (type: int), (_col5 - _wcol4) (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select abc.p_mfgr, abc.p_name, +rank() over (distribute by abc.p_mfgr sort by abc.p_name) as r, +dense_rank() over (distribute by abc.p_mfgr sort by abc.p_name) as dr, +count(abc.p_name) over (distribute by abc.p_mfgr sort by abc.p_name) as cd, +abc.p_retailprice, sum(abc.p_retailprice) over (distribute by abc.p_mfgr sort by abc.p_name rows between unbounded preceding and current row) as s1, +abc.p_size, abc.p_size - lag(abc.p_size,1,abc.p_size) over (distribute by abc.p_mfgr sort by abc.p_name) as deltaSz +from noop(on part +partition by p_mfgr +order by p_name +) abc join part p1 on abc.p_partkey = p1.p_partkey +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select abc.p_mfgr, abc.p_name, +rank() over (distribute by abc.p_mfgr sort by abc.p_name) as r, +dense_rank() over (distribute by abc.p_mfgr sort by abc.p_name) as dr, +count(abc.p_name) over (distribute by abc.p_mfgr sort by abc.p_name) as cd, +abc.p_retailprice, sum(abc.p_retailprice) over (distribute by abc.p_mfgr sort by abc.p_name rows between unbounded preceding and current row) as s1, +abc.p_size, abc.p_size - lag(abc.p_size,1,abc.p_size) over (distribute by abc.p_mfgr sort by abc.p_name) as deltaSz +from noop(on part +partition by p_mfgr +order by p_name +) abc join part p1 on abc.p_partkey = p1.p_partkey +POSTHOOK: type: QUERY POSTHOOK: Input: default@part #### A masked pattern was here #### Manufacturer#1 almond antique burnished rose metallic 1 1 4 1173.15 1173.15 2 0 @@ -686,19 +1902,99 @@ Manufacturer#5 almond antique sky peru orange 3 3 3 1788.73 5190.08 2 -4 Manufacturer#5 almond aquamarine dodger light gainsboro 4 4 4 1018.1 6208.18 46 44 Manufacturer#5 almond azure blanched chiffon midnight 5 5 5 1464.48 7672.66 23 -23 PREHOOK: query: -- 15. testDistinctInSelectWithPTF +explain select DISTINCT p_mfgr, p_name, p_size from noop(on part partition by p_mfgr order by p_name) PREHOOK: type: QUERY -PREHOOK: Input: default@part -#### A masked pattern was here #### POSTHOOK: query: -- 15. testDistinctInSelectWithPTF +explain select DISTINCT p_mfgr, p_name, p_size from noop(on part partition by p_mfgr order by p_name) POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 2 (GROUP, 2) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_mfgr (type: string), p_name (type: string) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int) + Reducer 2 + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int) + outputColumnNames: _col2, _col1, _col5 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col2 (type: string), _col1 (type: string), _col5 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reducer 3 + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select DISTINCT p_mfgr, p_name, p_size +from noop(on part +partition by p_mfgr +order by p_name) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select DISTINCT p_mfgr, p_name, p_size +from noop(on part +partition by p_mfgr +order by p_name) +POSTHOOK: type: QUERY POSTHOOK: Input: default@part #### A masked pattern was here #### Manufacturer#1 almond antique burnished rose metallic 2 @@ -746,6 +2042,106 @@ POSTHOOK: type: CREATEVIEW POSTHOOK: Input: default@part POSTHOOK: Output: database:default POSTHOOK: Output: default@mfgr_price_view +PREHOOK: query: explain +select p_mfgr, p_brand, s, +sum(s) over w1 as s1 +from noop(on mfgr_price_view +partition by p_mfgr +order by p_mfgr) +window w1 as ( partition by p_mfgr order by p_brand rows between 2 preceding and current row) +PREHOOK: type: QUERY +POSTHOOK: query: explain +select p_mfgr, p_brand, s, +sum(s) over w1 as s1 +from noop(on mfgr_price_view +partition by p_mfgr +order by p_mfgr) +window w1 as ( partition by p_mfgr order by p_brand rows between 2 preceding and current row) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 2) + Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 2) + Reducer 4 <- Reducer 3 (PARTITION-LEVEL SORT, 2) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: p_mfgr (type: string), p_brand (type: string), p_retailprice (type: double) + outputColumnNames: p_mfgr, p_brand, p_retailprice + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(p_retailprice) + keys: p_mfgr (type: string), p_brand (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: double) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col0 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: double) + Reducer 3 + Reduce Operator Tree: + Extract + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: double) + Reducer 4 + Reduce Operator Tree: + Extract + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: double), _wcol0 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + PREHOOK: query: select p_mfgr, p_brand, s, sum(s) over w1 as s1 from noop(on mfgr_price_view @@ -828,6 +2224,166 @@ fv1 INT) POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@part_5 +PREHOOK: query: explain +from noop(on part +partition by p_mfgr +order by p_name) +INSERT OVERWRITE TABLE part_4 select p_mfgr, p_name, p_size, +rank() over (distribute by p_mfgr sort by p_name) as r, +dense_rank() over (distribute by p_mfgr sort by p_name) as dr, +sum(p_retailprice) over (distribute by p_mfgr sort by p_name rows between unbounded preceding and current row) as s +INSERT OVERWRITE TABLE part_5 select p_mfgr,p_name, p_size, +round(sum(p_size) over (distribute by p_mfgr sort by p_size range between 5 preceding and current row),1) as s2, +rank() over (distribute by p_mfgr sort by p_mfgr, p_name) as r, +dense_rank() over (distribute by p_mfgr sort by p_mfgr, p_name) as dr, +cume_dist() over (distribute by p_mfgr sort by p_mfgr, p_name) as cud, +first_value(p_size, true) over w1 as fv1 +window w1 as (distribute by p_mfgr sort by p_mfgr, p_name rows between 2 preceding and 2 following) +PREHOOK: type: QUERY +POSTHOOK: query: explain +from noop(on part +partition by p_mfgr +order by p_name) +INSERT OVERWRITE TABLE part_4 select p_mfgr, p_name, p_size, +rank() over (distribute by p_mfgr sort by p_name) as r, +dense_rank() over (distribute by p_mfgr sort by p_name) as dr, +sum(p_retailprice) over (distribute by p_mfgr sort by p_name rows between unbounded preceding and current row) as s +INSERT OVERWRITE TABLE part_5 select p_mfgr,p_name, p_size, +round(sum(p_size) over (distribute by p_mfgr sort by p_size range between 5 preceding and current row),1) as s2, +rank() over (distribute by p_mfgr sort by p_mfgr, p_name) as r, +dense_rank() over (distribute by p_mfgr sort by p_mfgr, p_name) as dr, +cume_dist() over (distribute by p_mfgr sort by p_mfgr, p_name) as cud, +first_value(p_size, true) over w1 as fv1 +window w1 as (distribute by p_mfgr sort by p_mfgr, p_name rows between 2 preceding and 2 following) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-2 is a root stage + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 + Stage-1 depends on stages: Stage-2 + Stage-4 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-2 + Spark + Edges: + Reducer 6 <- Map 1 (PARTITION-LEVEL SORT, 2) + Reducer 7 <- Map 1 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 6 (PARTITION-LEVEL SORT, 2) + Reducer 4 <- Reducer 7 (PARTITION-LEVEL SORT, 2) + Reducer 5 <- Reducer 4 (PARTITION-LEVEL SORT, 2) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_mfgr (type: string), p_name (type: string) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int), p_retailprice (type: double) + Reducer 3 + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.part_4 + Reducer 4 + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: string), _col2 (type: string), _col1 (type: string) + sort order: +++ + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _wcol0 (type: bigint), _col1 (type: string), _col2 (type: string), _col5 (type: int) + Reducer 5 + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col3 (type: string), _col2 (type: string), _col6 (type: int), UDFToInteger(round(_col0, 1)) (type: int), _wcol1 (type: int), _wcol2 (type: int), _wcol3 (type: double), _wcol4 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.part_5 + Reducer 6 + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int), _col7 (type: double) + Reducer 7 + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: string), _col5 (type: int) + sort order: ++ + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.part_4 + + Stage: Stage-3 + Stats-Aggr Operator + + Stage: Stage-1 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.part_5 + + Stage: Stage-4 + Stats-Aggr Operator + PREHOOK: query: from noop(on part partition by p_mfgr order by p_name) @@ -947,6 +2503,7 @@ Manufacturer#5 almond antique sky peru orange 2 2 3 3 0.6 31 Manufacturer#5 almond aquamarine dodger light gainsboro 46 46 4 4 0.8 6 Manufacturer#5 almond azure blanched chiffon midnight 23 23 5 5 1.0 2 PREHOOK: query: -- 18. testMulti2OperatorsFunctionChainWithMap +explain select p_mfgr, p_name, rank() over (partition by p_mfgr,p_name) as r, dense_rank() over (partition by p_mfgr,p_name) as dr, @@ -963,9 +2520,8 @@ from noop(on partition by p_mfgr,p_name order by p_mfgr,p_name) PREHOOK: type: QUERY -PREHOOK: Input: default@part -#### A masked pattern was here #### POSTHOOK: query: -- 18. testMulti2OperatorsFunctionChainWithMap +explain select p_mfgr, p_name, rank() over (partition by p_mfgr,p_name) as r, dense_rank() over (partition by p_mfgr,p_name) as dr, @@ -982,6 +2538,114 @@ from noop(on partition by p_mfgr,p_name order by p_mfgr,p_name) POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 2) + Reducer 4 <- Reducer 3 (PARTITION-LEVEL SORT, 2) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_mfgr (type: string), p_mfgr (type: string) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int) + Reducer 2 + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string), _col2 (type: string), _col1 (type: string) + sort order: ++++ + Map-reduce partition columns: _col2 (type: string), _col1 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) + Reducer 3 + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string), _col2 (type: string), _col1 (type: string) + sort order: ++++ + Map-reduce partition columns: _col2 (type: string), _col1 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) + Reducer 4 + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col1 (type: string), _wcol0 (type: int), _wcol1 (type: int), _col5 (type: int), _wcol2 (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr, p_name, +rank() over (partition by p_mfgr,p_name) as r, +dense_rank() over (partition by p_mfgr,p_name) as dr, +p_size, sum(p_size) over (partition by p_mfgr,p_name rows between unbounded preceding and current row) as s1 +from noop(on + noopwithmap(on + noop(on + noop(on part + partition by p_mfgr + order by p_mfgr) + ) + partition by p_mfgr,p_name + order by p_mfgr,p_name) + partition by p_mfgr,p_name + order by p_mfgr,p_name) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, p_name, +rank() over (partition by p_mfgr,p_name) as r, +dense_rank() over (partition by p_mfgr,p_name) as dr, +p_size, sum(p_size) over (partition by p_mfgr,p_name rows between unbounded preceding and current row) as s1 +from noop(on + noopwithmap(on + noop(on + noop(on part + partition by p_mfgr + order by p_mfgr) + ) + partition by p_mfgr,p_name + order by p_mfgr,p_name) + partition by p_mfgr,p_name + order by p_mfgr,p_name) +POSTHOOK: type: QUERY POSTHOOK: Input: default@part #### A masked pattern was here #### Manufacturer#1 almond antique burnished rose metallic 1 1 2 2 @@ -1011,6 +2675,7 @@ Manufacturer#5 almond antique sky peru orange 1 1 2 2 Manufacturer#5 almond aquamarine dodger light gainsboro 1 1 46 46 Manufacturer#5 almond azure blanched chiffon midnight 1 1 23 23 PREHOOK: query: -- 19. testMulti3OperatorsFunctionChain +explain select p_mfgr, p_name, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -1027,9 +2692,8 @@ from noop(on partition by p_mfgr order by p_mfgr ) PREHOOK: type: QUERY -PREHOOK: Input: default@part -#### A masked pattern was here #### POSTHOOK: query: -- 19. testMulti3OperatorsFunctionChain +explain select p_mfgr, p_name, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -1046,6 +2710,125 @@ from noop(on partition by p_mfgr order by p_mfgr ) POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 2) + Reducer 4 <- Reducer 3 (PARTITION-LEVEL SORT, 2) + Reducer 5 <- Reducer 4 (PARTITION-LEVEL SORT, 2) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_mfgr (type: string), p_mfgr (type: string) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int) + Reducer 2 + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string), _col2 (type: string), _col1 (type: string) + sort order: ++++ + Map-reduce partition columns: _col2 (type: string), _col1 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) + Reducer 3 + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: string), _col2 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) + Reducer 4 + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) + Reducer 5 + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col1 (type: string), _wcol0 (type: int), _wcol1 (type: int), _col5 (type: int), _wcol2 (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr, p_name, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +p_size, sum(p_size) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s1 +from noop(on + noop(on + noop(on + noop(on part + partition by p_mfgr + order by p_mfgr) + ) + partition by p_mfgr,p_name + order by p_mfgr,p_name) + partition by p_mfgr + order by p_mfgr ) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, p_name, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +p_size, sum(p_size) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s1 +from noop(on + noop(on + noop(on + noop(on part + partition by p_mfgr + order by p_mfgr) + ) + partition by p_mfgr,p_name + order by p_mfgr,p_name) + partition by p_mfgr + order by p_mfgr ) +POSTHOOK: type: QUERY POSTHOOK: Input: default@part #### A masked pattern was here #### Manufacturer#1 almond antique burnished rose metallic 1 1 2 2 @@ -1075,6 +2858,7 @@ Manufacturer#5 almond antique sky peru orange 3 3 2 39 Manufacturer#5 almond aquamarine dodger light gainsboro 4 4 46 85 Manufacturer#5 almond azure blanched chiffon midnight 5 5 23 108 PREHOOK: query: -- 20. testMultiOperatorChainWithNoWindowing +explain select p_mfgr, p_name, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -1089,9 +2873,8 @@ from noop(on partition by p_mfgr order by p_mfgr)) PREHOOK: type: QUERY -PREHOOK: Input: default@part -#### A masked pattern was here #### POSTHOOK: query: -- 20. testMultiOperatorChainWithNoWindowing +explain select p_mfgr, p_name, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -1106,6 +2889,108 @@ from noop(on partition by p_mfgr order by p_mfgr)) POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 2) + Reducer 4 <- Reducer 3 (PARTITION-LEVEL SORT, 2) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_mfgr (type: string), p_name (type: string), p_mfgr (type: string), p_name (type: string) + sort order: ++++ + Map-reduce partition columns: p_mfgr (type: string), p_name (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int) + Reducer 2 + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: string), _col2 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) + Reducer 3 + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) + Reducer 4 + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col1 (type: string), _wcol0 (type: int), _wcol1 (type: int), _col5 (type: int), _wcol2 (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr, p_name, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +p_size, sum(p_size) over (partition by p_mfgr order by p_name) as s1 +from noop(on + noop(on + noop(on + noop(on part + partition by p_mfgr,p_name + order by p_mfgr,p_name) + ) + partition by p_mfgr + order by p_mfgr)) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, p_name, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +p_size, sum(p_size) over (partition by p_mfgr order by p_name) as s1 +from noop(on + noop(on + noop(on + noop(on part + partition by p_mfgr,p_name + order by p_mfgr,p_name) + ) + partition by p_mfgr + order by p_mfgr)) +POSTHOOK: type: QUERY POSTHOOK: Input: default@part #### A masked pattern was here #### Manufacturer#1 almond antique burnished rose metallic 1 1 2 4 @@ -1135,6 +3020,7 @@ Manufacturer#5 almond antique sky peru orange 3 3 2 39 Manufacturer#5 almond aquamarine dodger light gainsboro 4 4 46 85 Manufacturer#5 almond azure blanched chiffon midnight 5 5 23 108 PREHOOK: query: -- 21. testMultiOperatorChainEndsWithNoopMap +explain select p_mfgr, p_name, rank() over (partition by p_mfgr,p_name) as r, dense_rank() over (partition by p_mfgr,p_name) as dr, @@ -1151,9 +3037,8 @@ from noopwithmap(on partition by p_mfgr,p_name order by p_mfgr,p_name) PREHOOK: type: QUERY -PREHOOK: Input: default@part -#### A masked pattern was here #### POSTHOOK: query: -- 21. testMultiOperatorChainEndsWithNoopMap +explain select p_mfgr, p_name, rank() over (partition by p_mfgr,p_name) as r, dense_rank() over (partition by p_mfgr,p_name) as dr, @@ -1170,6 +3055,127 @@ from noopwithmap(on partition by p_mfgr,p_name order by p_mfgr,p_name) POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 2) + Reducer 4 <- Reducer 3 (PARTITION-LEVEL SORT, 2) + Reducer 5 <- Reducer 4 (PARTITION-LEVEL SORT, 2) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_mfgr (type: string), p_name (type: string), p_mfgr (type: string), p_name (type: string) + sort order: ++++ + Map-reduce partition columns: p_mfgr (type: string), p_name (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int) + Reducer 2 + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: string), _col2 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) + Reducer 3 + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string), _col2 (type: string), _col1 (type: string) + sort order: ++++ + Map-reduce partition columns: _col2 (type: string), _col1 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) + Reducer 4 + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string), _col2 (type: string), _col1 (type: string) + sort order: ++++ + Map-reduce partition columns: _col2 (type: string), _col1 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) + Reducer 5 + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col1 (type: string), _wcol0 (type: int), _wcol1 (type: int), _col5 (type: int), _wcol2 (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr, p_name, +rank() over (partition by p_mfgr,p_name) as r, +dense_rank() over (partition by p_mfgr,p_name) as dr, +p_size, sum(p_size) over (partition by p_mfgr,p_name rows between unbounded preceding and current row) as s1 +from noopwithmap(on + noop(on + noop(on + noop(on part + partition by p_mfgr,p_name + order by p_mfgr,p_name) + ) + partition by p_mfgr + order by p_mfgr) + partition by p_mfgr,p_name + order by p_mfgr,p_name) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, p_name, +rank() over (partition by p_mfgr,p_name) as r, +dense_rank() over (partition by p_mfgr,p_name) as dr, +p_size, sum(p_size) over (partition by p_mfgr,p_name rows between unbounded preceding and current row) as s1 +from noopwithmap(on + noop(on + noop(on + noop(on part + partition by p_mfgr,p_name + order by p_mfgr,p_name) + ) + partition by p_mfgr + order by p_mfgr) + partition by p_mfgr,p_name + order by p_mfgr,p_name) +POSTHOOK: type: QUERY POSTHOOK: Input: default@part #### A masked pattern was here #### Manufacturer#1 almond antique burnished rose metallic 1 1 2 2 @@ -1199,6 +3205,7 @@ Manufacturer#5 almond antique sky peru orange 1 1 2 2 Manufacturer#5 almond aquamarine dodger light gainsboro 1 1 46 46 Manufacturer#5 almond azure blanched chiffon midnight 1 1 23 23 PREHOOK: query: -- 22. testMultiOperatorChainWithDiffPartitionForWindow1 +explain select p_mfgr, p_name, rank() over (partition by p_mfgr,p_name order by p_mfgr,p_name) as r, dense_rank() over (partition by p_mfgr,p_name order by p_mfgr,p_name) as dr, @@ -1214,9 +3221,8 @@ from noop(on order by p_mfgr )) PREHOOK: type: QUERY -PREHOOK: Input: default@part -#### A masked pattern was here #### POSTHOOK: query: -- 22. testMultiOperatorChainWithDiffPartitionForWindow1 +explain select p_mfgr, p_name, rank() over (partition by p_mfgr,p_name order by p_mfgr,p_name) as r, dense_rank() over (partition by p_mfgr,p_name order by p_mfgr,p_name) as dr, @@ -1232,6 +3238,112 @@ from noop(on order by p_mfgr )) POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 2) + Reducer 4 <- Reducer 3 (PARTITION-LEVEL SORT, 2) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_mfgr (type: string), p_name (type: string), p_mfgr (type: string), p_name (type: string) + sort order: ++++ + Map-reduce partition columns: p_mfgr (type: string), p_name (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int) + Reducer 2 + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: string), _col2 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) + Reducer 3 + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string), _col2 (type: string), _col1 (type: string) + sort order: ++++ + Map-reduce partition columns: _col2 (type: string), _col1 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) + Reducer 4 + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col1 (type: string), _wcol0 (type: int), _wcol1 (type: int), _col5 (type: int), _wcol2 (type: bigint), _wcol2 (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr, p_name, +rank() over (partition by p_mfgr,p_name order by p_mfgr,p_name) as r, +dense_rank() over (partition by p_mfgr,p_name order by p_mfgr,p_name) as dr, +p_size, +sum(p_size) over (partition by p_mfgr,p_name order by p_mfgr,p_name rows between unbounded preceding and current row) as s1, +sum(p_size) over (partition by p_mfgr,p_name order by p_mfgr,p_name rows between unbounded preceding and current row) as s2 +from noop(on + noopwithmap(on + noop(on part + partition by p_mfgr, p_name + order by p_mfgr, p_name) + partition by p_mfgr + order by p_mfgr + )) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, p_name, +rank() over (partition by p_mfgr,p_name order by p_mfgr,p_name) as r, +dense_rank() over (partition by p_mfgr,p_name order by p_mfgr,p_name) as dr, +p_size, +sum(p_size) over (partition by p_mfgr,p_name order by p_mfgr,p_name rows between unbounded preceding and current row) as s1, +sum(p_size) over (partition by p_mfgr,p_name order by p_mfgr,p_name rows between unbounded preceding and current row) as s2 +from noop(on + noopwithmap(on + noop(on part + partition by p_mfgr, p_name + order by p_mfgr, p_name) + partition by p_mfgr + order by p_mfgr + )) +POSTHOOK: type: QUERY POSTHOOK: Input: default@part #### A masked pattern was here #### Manufacturer#1 almond antique burnished rose metallic 1 1 2 2 2 @@ -1261,6 +3373,7 @@ Manufacturer#5 almond antique sky peru orange 1 1 2 2 2 Manufacturer#5 almond aquamarine dodger light gainsboro 1 1 46 46 46 Manufacturer#5 almond azure blanched chiffon midnight 1 1 23 23 23 PREHOOK: query: -- 23. testMultiOperatorChainWithDiffPartitionForWindow2 +explain select p_mfgr, p_name, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -1274,9 +3387,8 @@ from noopwithmap(on order by p_mfgr, p_name) )) PREHOOK: type: QUERY -PREHOOK: Input: default@part -#### A masked pattern was here #### POSTHOOK: query: -- 23. testMultiOperatorChainWithDiffPartitionForWindow2 +explain select p_mfgr, p_name, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -1290,6 +3402,108 @@ from noopwithmap(on order by p_mfgr, p_name) )) POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 2) + Reducer 4 <- Reducer 3 (PARTITION-LEVEL SORT, 2) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_mfgr (type: string), p_name (type: string), p_mfgr (type: string), p_name (type: string) + sort order: ++++ + Map-reduce partition columns: p_mfgr (type: string), p_name (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int) + Reducer 2 + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string), _col2 (type: string), _col1 (type: string) + sort order: ++++ + Map-reduce partition columns: _col2 (type: string), _col1 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) + Reducer 3 + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) + Reducer 4 + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col1 (type: string), _wcol0 (type: int), _wcol1 (type: int), _col5 (type: int), _wcol2 (type: bigint), _wcol2 (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr, p_name, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +p_size, +sum(p_size) over (partition by p_mfgr order by p_name range between unbounded preceding and current row) as s1, +sum(p_size) over (partition by p_mfgr order by p_name range between unbounded preceding and current row) as s2 +from noopwithmap(on + noop(on + noop(on part + partition by p_mfgr, p_name + order by p_mfgr, p_name) + )) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, p_name, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +p_size, +sum(p_size) over (partition by p_mfgr order by p_name range between unbounded preceding and current row) as s1, +sum(p_size) over (partition by p_mfgr order by p_name range between unbounded preceding and current row) as s2 +from noopwithmap(on + noop(on + noop(on part + partition by p_mfgr, p_name + order by p_mfgr, p_name) + )) +POSTHOOK: type: QUERY POSTHOOK: Input: default@part #### A masked pattern was here #### Manufacturer#1 almond antique burnished rose metallic 1 1 2 4 4 diff --git ql/src/test/results/clientpositive/spark/ptf_streaming.q.out ql/src/test/results/clientpositive/spark/ptf_streaming.q.out index ef7ae88..5b6768b 100644 --- ql/src/test/results/clientpositive/spark/ptf_streaming.q.out +++ ql/src/test/results/clientpositive/spark/ptf_streaming.q.out @@ -1,12 +1,7 @@ -PREHOOK: query: create temporary function noopstreaming as 'org.apache.hadoop.hive.ql.udf.ptf.NoopStreaming$NoopStreamingResolver' -PREHOOK: type: CREATEFUNCTION -PREHOOK: Output: noopstreaming -POSTHOOK: query: create temporary function noopstreaming as 'org.apache.hadoop.hive.ql.udf.ptf.NoopStreaming$NoopStreamingResolver' -POSTHOOK: type: CREATEFUNCTION -POSTHOOK: Output: noopstreaming PREHOOK: query: -- SORT_QUERY_RESULTS --1. test1 +explain select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -16,11 +11,10 @@ from noopstreaming(on part order by p_name ) PREHOOK: type: QUERY -PREHOOK: Input: default@part -#### A masked pattern was here #### POSTHOOK: query: -- SORT_QUERY_RESULTS --1. test1 +explain select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -30,6 +24,85 @@ from noopstreaming(on part order by p_name ) POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_mfgr (type: string), p_name (type: string) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int), p_retailprice (type: double) + Reducer 2 + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int), _col7 (type: double) + Reducer 3 + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr, p_name, p_size, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +sum(p_retailprice) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s1 +from noopstreaming(on part + partition by p_mfgr + order by p_name + ) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, p_name, p_size, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +sum(p_retailprice) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s1 +from noopstreaming(on part + partition by p_mfgr + order by p_name + ) +POSTHOOK: type: QUERY POSTHOOK: Input: default@part #### A masked pattern was here #### Manufacturer#1 almond antique burnished rose metallic 2 1 1 1173.15 @@ -59,21 +132,127 @@ Manufacturer#5 almond antique sky peru orange 2 3 3 5190.08 Manufacturer#5 almond aquamarine dodger light gainsboro 46 4 4 6208.18 Manufacturer#5 almond azure blanched chiffon midnight 23 5 5 7672.66 PREHOOK: query: -- 2. testJoinWithNoop +explain select p_mfgr, p_name, p_size, p_size - lag(p_size,1,p_size) over (partition by p_mfgr order by p_name) as deltaSz from noopstreaming (on (select p1.* from part p1 join part p2 on p1.p_partkey = p2.p_partkey) j distribute by j.p_mfgr sort by j.p_name) PREHOOK: type: QUERY -PREHOOK: Input: default@part -#### A masked pattern was here #### POSTHOOK: query: -- 2. testJoinWithNoop +explain select p_mfgr, p_name, p_size, p_size - lag(p_size,1,p_size) over (partition by p_mfgr order by p_name) as deltaSz from noopstreaming (on (select p1.* from part p1 join part p2 on p1.p_partkey = p2.p_partkey) j distribute by j.p_mfgr sort by j.p_name) POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 5 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 2) + Reducer 4 <- Reducer 3 (PARTITION-LEVEL SORT, 2) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: p1 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: p_partkey is not null (type: boolean) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_partkey (type: int) + sort order: + + Map-reduce partition columns: p_partkey (type: int) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int) + Map 5 + Map Operator Tree: + TableScan + alias: p2 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: p_partkey is not null (type: boolean) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_partkey (type: int) + sort order: + + Map-reduce partition columns: p_partkey (type: int) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 p_partkey (type: int) + 1 p_partkey (type: int) + outputColumnNames: _col1, _col2, _col5 + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) + Reducer 3 + Reduce Operator Tree: + Extract + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) + Reducer 4 + Reduce Operator Tree: + Extract + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), (_col5 - _wcol0) (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr, p_name, +p_size, p_size - lag(p_size,1,p_size) over (partition by p_mfgr order by p_name) as deltaSz +from noopstreaming (on (select p1.* from part p1 join part p2 on p1.p_partkey = p2.p_partkey) j +distribute by j.p_mfgr +sort by j.p_name) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, p_name, +p_size, p_size - lag(p_size,1,p_size) over (partition by p_mfgr order by p_name) as deltaSz +from noopstreaming (on (select p1.* from part p1 join part p2 on p1.p_partkey = p2.p_partkey) j +distribute by j.p_mfgr +sort by j.p_name) +POSTHOOK: type: QUERY POSTHOOK: Input: default@part #### A masked pattern was here #### Manufacturer#1 almond antique burnished rose metallic 2 0 @@ -105,21 +284,114 @@ Manufacturer#5 almond antique sky peru orange 2 -4 Manufacturer#5 almond aquamarine dodger light gainsboro 46 44 Manufacturer#5 almond azure blanched chiffon midnight 23 -23 PREHOOK: query: -- 7. testJoin +explain select abc.* from noopstreaming(on part partition by p_mfgr order by p_name ) abc join part p1 on abc.p_partkey = p1.p_partkey PREHOOK: type: QUERY -PREHOOK: Input: default@part -#### A masked pattern was here #### POSTHOOK: query: -- 7. testJoin +explain select abc.* from noopstreaming(on part partition by p_mfgr order by p_name ) abc join part p1 on abc.p_partkey = p1.p_partkey POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Map 4 (PARTITION-LEVEL SORT, 2), Reducer 2 (PARTITION-LEVEL SORT, 2) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_mfgr (type: string), p_name (type: string) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + Map 4 + Map Operator Tree: + TableScan + alias: p1 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: p_partkey is not null (type: boolean) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_partkey (type: int) + sort order: + + Map-reduce partition columns: p_partkey (type: int) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: _col0 is not null (type: boolean) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + Reducer 3 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 p_partkey (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select abc.* +from noopstreaming(on part +partition by p_mfgr +order by p_name +) abc join part p1 on abc.p_partkey = p1.p_partkey +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select abc.* +from noopstreaming(on part +partition by p_mfgr +order by p_name +) abc join part p1 on abc.p_partkey = p1.p_partkey +POSTHOOK: type: QUERY POSTHOOK: Input: default@part #### A masked pattern was here #### 105685 almond antique violet chocolate turquoise Manufacturer#2 Brand#22 MEDIUM ANODIZED COPPER 14 MED CAN 1690.68 ly pending requ @@ -151,21 +423,96 @@ POSTHOOK: Input: default@part 86428 almond aquamarine burnished black steel Manufacturer#1 Brand#12 STANDARD ANODIZED STEEL 28 WRAP BAG 1414.42 arefully 90681 almond antique chartreuse khaki white Manufacturer#3 Brand#31 MEDIUM BURNISHED TIN 17 SM CASE 1671.68 are slyly after the sl PREHOOK: query: -- 9. testNoopWithMap +explain select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name, p_size desc) as r from noopwithmapstreaming(on part partition by p_mfgr order by p_name, p_size desc) PREHOOK: type: QUERY -PREHOOK: Input: default@part -#### A masked pattern was here #### POSTHOOK: query: -- 9. testNoopWithMap +explain select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name, p_size desc) as r from noopwithmapstreaming(on part partition by p_mfgr order by p_name, p_size desc) POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 2) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_mfgr (type: string), p_name (type: string), p_size (type: int) + sort order: ++- + Map-reduce partition columns: p_mfgr (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int) + Reducer 2 + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int) + sort order: ++- + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) + Reducer 3 + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr, p_name, p_size, +rank() over (partition by p_mfgr order by p_name, p_size desc) as r +from noopwithmapstreaming(on part +partition by p_mfgr +order by p_name, p_size desc) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, p_name, p_size, +rank() over (partition by p_mfgr order by p_name, p_size desc) as r +from noopwithmapstreaming(on part +partition by p_mfgr +order by p_name, p_size desc) +POSTHOOK: type: QUERY POSTHOOK: Input: default@part #### A masked pattern was here #### Manufacturer#1 almond antique burnished rose metallic 2 1 @@ -195,6 +542,7 @@ Manufacturer#5 almond antique sky peru orange 2 3 Manufacturer#5 almond aquamarine dodger light gainsboro 46 4 Manufacturer#5 almond azure blanched chiffon midnight 23 5 PREHOOK: query: -- 10. testNoopWithMapWithWindowing +explain select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -203,9 +551,8 @@ from noopwithmapstreaming(on part partition by p_mfgr order by p_name) PREHOOK: type: QUERY -PREHOOK: Input: default@part -#### A masked pattern was here #### POSTHOOK: query: -- 10. testNoopWithMapWithWindowing +explain select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -214,6 +561,85 @@ from noopwithmapstreaming(on part partition by p_mfgr order by p_name) POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 2) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_mfgr (type: string), p_name (type: string) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int), p_retailprice (type: double) + Reducer 2 + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int), _col7 (type: double) + Reducer 3 + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr, p_name, p_size, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +sum(p_retailprice) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s1 +from noopwithmapstreaming(on part + partition by p_mfgr + order by p_name) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, p_name, p_size, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +sum(p_retailprice) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s1 +from noopwithmapstreaming(on part + partition by p_mfgr + order by p_name) +POSTHOOK: type: QUERY POSTHOOK: Input: default@part #### A masked pattern was here #### Manufacturer#1 almond antique burnished rose metallic 2 1 1 1173.15 @@ -243,6 +669,7 @@ Manufacturer#5 almond antique sky peru orange 2 3 3 5190.08 Manufacturer#5 almond aquamarine dodger light gainsboro 46 4 4 6208.18 Manufacturer#5 almond azure blanched chiffon midnight 23 5 5 7672.66 PREHOOK: query: -- 12. testFunctionChain +explain select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -252,9 +679,8 @@ partition by p_mfgr order by p_mfgr, p_name ))) PREHOOK: type: QUERY -PREHOOK: Input: default@part -#### A masked pattern was here #### POSTHOOK: query: -- 12. testFunctionChain +explain select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -264,6 +690,100 @@ partition by p_mfgr order by p_mfgr, p_name ))) POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 2) + Reducer 4 <- Reducer 3 (PARTITION-LEVEL SORT, 2) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_mfgr (type: string), p_mfgr (type: string), p_name (type: string) + sort order: +++ + Map-reduce partition columns: p_mfgr (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int), p_retailprice (type: double) + Reducer 2 + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: string), _col2 (type: string), _col1 (type: string) + sort order: +++ + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int), _col7 (type: double) + Reducer 3 + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int), _col7 (type: double) + Reducer 4 + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr, p_name, p_size, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +sum(p_retailprice) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s1 +from noopstreaming(on noopwithmapstreaming(on noopstreaming(on part +partition by p_mfgr +order by p_mfgr, p_name +))) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, p_name, p_size, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +sum(p_retailprice) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s1 +from noopstreaming(on noopwithmapstreaming(on noopstreaming(on part +partition by p_mfgr +order by p_mfgr, p_name +))) +POSTHOOK: type: QUERY POSTHOOK: Input: default@part #### A masked pattern was here #### Manufacturer#1 almond antique burnished rose metallic 2 1 1 1173.15 @@ -293,6 +813,7 @@ Manufacturer#5 almond antique sky peru orange 2 3 3 5190.08 Manufacturer#5 almond aquamarine dodger light gainsboro 46 4 4 6208.18 Manufacturer#5 almond azure blanched chiffon midnight 23 5 5 7672.66 PREHOOK: query: -- 12.1 testFunctionChain +explain select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -302,9 +823,8 @@ partition by p_mfgr order by p_mfgr, p_name ))) PREHOOK: type: QUERY -PREHOOK: Input: default@part -#### A masked pattern was here #### POSTHOOK: query: -- 12.1 testFunctionChain +explain select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -314,6 +834,100 @@ partition by p_mfgr order by p_mfgr, p_name ))) POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 2) + Reducer 4 <- Reducer 3 (PARTITION-LEVEL SORT, 2) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_mfgr (type: string), p_mfgr (type: string), p_name (type: string) + sort order: +++ + Map-reduce partition columns: p_mfgr (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int), p_retailprice (type: double) + Reducer 2 + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: string), _col2 (type: string), _col1 (type: string) + sort order: +++ + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int), _col7 (type: double) + Reducer 3 + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int), _col7 (type: double) + Reducer 4 + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr, p_name, p_size, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +sum(p_retailprice) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s1 +from noopstreaming(on noopwithmap(on noopstreaming(on part +partition by p_mfgr +order by p_mfgr, p_name +))) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, p_name, p_size, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +sum(p_retailprice) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s1 +from noopstreaming(on noopwithmap(on noopstreaming(on part +partition by p_mfgr +order by p_mfgr, p_name +))) +POSTHOOK: type: QUERY POSTHOOK: Input: default@part #### A masked pattern was here #### Manufacturer#1 almond antique burnished rose metallic 2 1 1 1173.15 @@ -343,6 +957,7 @@ Manufacturer#5 almond antique sky peru orange 2 3 3 5190.08 Manufacturer#5 almond aquamarine dodger light gainsboro 46 4 4 6208.18 Manufacturer#5 almond azure blanched chiffon midnight 23 5 5 7672.66 PREHOOK: query: -- 12.2 testFunctionChain +explain select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -352,9 +967,8 @@ partition by p_mfgr order by p_mfgr, p_name ))) PREHOOK: type: QUERY -PREHOOK: Input: default@part -#### A masked pattern was here #### POSTHOOK: query: -- 12.2 testFunctionChain +explain select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -364,6 +978,100 @@ partition by p_mfgr order by p_mfgr, p_name ))) POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 2) + Reducer 4 <- Reducer 3 (PARTITION-LEVEL SORT, 2) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_mfgr (type: string), p_mfgr (type: string), p_name (type: string) + sort order: +++ + Map-reduce partition columns: p_mfgr (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int), p_retailprice (type: double) + Reducer 2 + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: string), _col2 (type: string), _col1 (type: string) + sort order: +++ + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int), _col7 (type: double) + Reducer 3 + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int), _col7 (type: double) + Reducer 4 + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr, p_name, p_size, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +sum(p_retailprice) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s1 +from noop(on noopwithmapstreaming(on noopstreaming(on part +partition by p_mfgr +order by p_mfgr, p_name +))) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, p_name, p_size, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +sum(p_retailprice) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s1 +from noop(on noopwithmapstreaming(on noopstreaming(on part +partition by p_mfgr +order by p_mfgr, p_name +))) +POSTHOOK: type: QUERY POSTHOOK: Input: default@part #### A masked pattern was here #### Manufacturer#1 almond antique burnished rose metallic 2 1 1 1173.15 @@ -393,6 +1101,7 @@ Manufacturer#5 almond antique sky peru orange 2 3 3 5190.08 Manufacturer#5 almond aquamarine dodger light gainsboro 46 4 4 6208.18 Manufacturer#5 almond azure blanched chiffon midnight 23 5 5 7672.66 PREHOOK: query: -- 14. testPTFJoinWithWindowingWithCount +explain select abc.p_mfgr, abc.p_name, rank() over (distribute by abc.p_mfgr sort by abc.p_name) as r, dense_rank() over (distribute by abc.p_mfgr sort by abc.p_name) as dr, @@ -404,9 +1113,8 @@ partition by p_mfgr order by p_name ) abc join part p1 on abc.p_partkey = p1.p_partkey PREHOOK: type: QUERY -PREHOOK: Input: default@part -#### A masked pattern was here #### POSTHOOK: query: -- 14. testPTFJoinWithWindowingWithCount +explain select abc.p_mfgr, abc.p_name, rank() over (distribute by abc.p_mfgr sort by abc.p_name) as r, dense_rank() over (distribute by abc.p_mfgr sort by abc.p_name) as dr, @@ -418,6 +1126,122 @@ partition by p_mfgr order by p_name ) abc join part p1 on abc.p_partkey = p1.p_partkey POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Map 5 (PARTITION-LEVEL SORT, 2), Reducer 2 (PARTITION-LEVEL SORT, 2) + Reducer 4 <- Reducer 3 (PARTITION-LEVEL SORT, 2) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_mfgr (type: string), p_name (type: string) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_size (type: int), p_retailprice (type: double) + Map 5 + Map Operator Tree: + TableScan + alias: p1 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: p_partkey is not null (type: boolean) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_partkey (type: int) + sort order: + + Map-reduce partition columns: p_partkey (type: int) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: _col0 is not null (type: boolean) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int), _col7 (type: double) + Reducer 3 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 p_partkey (type: int) + outputColumnNames: _col1, _col2, _col5, _col7 + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int), _col7 (type: double) + Reducer 4 + Reduce Operator Tree: + Extract + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col1 (type: string), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: bigint), _col7 (type: double), _wcol3 (type: double), _col5 (type: int), (_col5 - _wcol4) (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select abc.p_mfgr, abc.p_name, +rank() over (distribute by abc.p_mfgr sort by abc.p_name) as r, +dense_rank() over (distribute by abc.p_mfgr sort by abc.p_name) as dr, +count(abc.p_name) over (distribute by abc.p_mfgr sort by abc.p_name) as cd, +abc.p_retailprice, sum(abc.p_retailprice) over (distribute by abc.p_mfgr sort by abc.p_name rows between unbounded preceding and current row) as s1, +abc.p_size, abc.p_size - lag(abc.p_size,1,abc.p_size) over (distribute by abc.p_mfgr sort by abc.p_name) as deltaSz +from noopstreaming(on part +partition by p_mfgr +order by p_name +) abc join part p1 on abc.p_partkey = p1.p_partkey +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select abc.p_mfgr, abc.p_name, +rank() over (distribute by abc.p_mfgr sort by abc.p_name) as r, +dense_rank() over (distribute by abc.p_mfgr sort by abc.p_name) as dr, +count(abc.p_name) over (distribute by abc.p_mfgr sort by abc.p_name) as cd, +abc.p_retailprice, sum(abc.p_retailprice) over (distribute by abc.p_mfgr sort by abc.p_name rows between unbounded preceding and current row) as s1, +abc.p_size, abc.p_size - lag(abc.p_size,1,abc.p_size) over (distribute by abc.p_mfgr sort by abc.p_name) as deltaSz +from noopstreaming(on part +partition by p_mfgr +order by p_name +) abc join part p1 on abc.p_partkey = p1.p_partkey +POSTHOOK: type: QUERY POSTHOOK: Input: default@part #### A masked pattern was here #### Manufacturer#1 almond antique burnished rose metallic 1 1 4 1173.15 1173.15 2 0 @@ -449,6 +1273,7 @@ Manufacturer#5 almond antique sky peru orange 3 3 3 1788.73 5190.08 2 -4 Manufacturer#5 almond aquamarine dodger light gainsboro 4 4 4 1018.1 6208.18 46 44 Manufacturer#5 almond azure blanched chiffon midnight 5 5 5 1464.48 7672.66 23 -23 PREHOOK: query: -- 18. testMulti2OperatorsFunctionChainWithMap +explain select p_mfgr, p_name, rank() over (partition by p_mfgr,p_name) as r, dense_rank() over (partition by p_mfgr,p_name) as dr, @@ -465,9 +1290,8 @@ from noopstreaming(on partition by p_mfgr,p_name order by p_mfgr,p_name) PREHOOK: type: QUERY -PREHOOK: Input: default@part -#### A masked pattern was here #### POSTHOOK: query: -- 18. testMulti2OperatorsFunctionChainWithMap +explain select p_mfgr, p_name, rank() over (partition by p_mfgr,p_name) as r, dense_rank() over (partition by p_mfgr,p_name) as dr, @@ -484,6 +1308,114 @@ from noopstreaming(on partition by p_mfgr,p_name order by p_mfgr,p_name) POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 2) + Reducer 4 <- Reducer 3 (PARTITION-LEVEL SORT, 2) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_mfgr (type: string), p_mfgr (type: string) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int) + Reducer 2 + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string), _col2 (type: string), _col1 (type: string) + sort order: ++++ + Map-reduce partition columns: _col2 (type: string), _col1 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) + Reducer 3 + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string), _col2 (type: string), _col1 (type: string) + sort order: ++++ + Map-reduce partition columns: _col2 (type: string), _col1 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) + Reducer 4 + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col1 (type: string), _wcol0 (type: int), _wcol1 (type: int), _col5 (type: int), _wcol2 (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr, p_name, +rank() over (partition by p_mfgr,p_name) as r, +dense_rank() over (partition by p_mfgr,p_name) as dr, +p_size, sum(p_size) over (partition by p_mfgr,p_name rows between unbounded preceding and current row) as s1 +from noopstreaming(on + noopwithmap(on + noop(on + noopstreaming(on part + partition by p_mfgr + order by p_mfgr) + ) + partition by p_mfgr,p_name + order by p_mfgr,p_name) + partition by p_mfgr,p_name + order by p_mfgr,p_name) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, p_name, +rank() over (partition by p_mfgr,p_name) as r, +dense_rank() over (partition by p_mfgr,p_name) as dr, +p_size, sum(p_size) over (partition by p_mfgr,p_name rows between unbounded preceding and current row) as s1 +from noopstreaming(on + noopwithmap(on + noop(on + noopstreaming(on part + partition by p_mfgr + order by p_mfgr) + ) + partition by p_mfgr,p_name + order by p_mfgr,p_name) + partition by p_mfgr,p_name + order by p_mfgr,p_name) +POSTHOOK: type: QUERY POSTHOOK: Input: default@part #### A masked pattern was here #### Manufacturer#1 almond antique burnished rose metallic 1 1 2 2 @@ -513,6 +1445,7 @@ Manufacturer#5 almond antique sky peru orange 1 1 2 2 Manufacturer#5 almond aquamarine dodger light gainsboro 1 1 46 46 Manufacturer#5 almond azure blanched chiffon midnight 1 1 23 23 PREHOOK: query: -- 19. testMulti3OperatorsFunctionChain +explain select p_mfgr, p_name, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -529,9 +1462,8 @@ from noop(on partition by p_mfgr order by p_mfgr ) PREHOOK: type: QUERY -PREHOOK: Input: default@part -#### A masked pattern was here #### POSTHOOK: query: -- 19. testMulti3OperatorsFunctionChain +explain select p_mfgr, p_name, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -548,6 +1480,125 @@ from noop(on partition by p_mfgr order by p_mfgr ) POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 2) + Reducer 4 <- Reducer 3 (PARTITION-LEVEL SORT, 2) + Reducer 5 <- Reducer 4 (PARTITION-LEVEL SORT, 2) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_mfgr (type: string), p_mfgr (type: string) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int) + Reducer 2 + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string), _col2 (type: string), _col1 (type: string) + sort order: ++++ + Map-reduce partition columns: _col2 (type: string), _col1 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) + Reducer 3 + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: string), _col2 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) + Reducer 4 + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) + Reducer 5 + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col1 (type: string), _wcol0 (type: int), _wcol1 (type: int), _col5 (type: int), _wcol2 (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr, p_name, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +p_size, sum(p_size) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s1 +from noop(on + noopstreaming(on + noop(on + noopstreaming(on part + partition by p_mfgr + order by p_mfgr) + ) + partition by p_mfgr,p_name + order by p_mfgr,p_name) + partition by p_mfgr + order by p_mfgr ) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, p_name, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +p_size, sum(p_size) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s1 +from noop(on + noopstreaming(on + noop(on + noopstreaming(on part + partition by p_mfgr + order by p_mfgr) + ) + partition by p_mfgr,p_name + order by p_mfgr,p_name) + partition by p_mfgr + order by p_mfgr ) +POSTHOOK: type: QUERY POSTHOOK: Input: default@part #### A masked pattern was here #### Manufacturer#1 almond antique burnished rose metallic 1 1 2 2 @@ -577,6 +1628,7 @@ Manufacturer#5 almond antique sky peru orange 3 3 2 39 Manufacturer#5 almond aquamarine dodger light gainsboro 4 4 46 85 Manufacturer#5 almond azure blanched chiffon midnight 5 5 23 108 PREHOOK: query: -- 23. testMultiOperatorChainWithDiffPartitionForWindow2 +explain select p_mfgr, p_name, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -590,9 +1642,8 @@ from noopwithmapstreaming(on order by p_mfgr, p_name) )) PREHOOK: type: QUERY -PREHOOK: Input: default@part -#### A masked pattern was here #### POSTHOOK: query: -- 23. testMultiOperatorChainWithDiffPartitionForWindow2 +explain select p_mfgr, p_name, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -606,6 +1657,108 @@ from noopwithmapstreaming(on order by p_mfgr, p_name) )) POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 2) + Reducer 4 <- Reducer 3 (PARTITION-LEVEL SORT, 2) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_mfgr (type: string), p_name (type: string), p_mfgr (type: string), p_name (type: string) + sort order: ++++ + Map-reduce partition columns: p_mfgr (type: string), p_name (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int) + Reducer 2 + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string), _col2 (type: string), _col1 (type: string) + sort order: ++++ + Map-reduce partition columns: _col2 (type: string), _col1 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) + Reducer 3 + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) + Reducer 4 + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col1 (type: string), _wcol0 (type: int), _wcol1 (type: int), _col5 (type: int), _wcol2 (type: bigint), _wcol2 (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr, p_name, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +p_size, +sum(p_size) over (partition by p_mfgr order by p_name range between unbounded preceding and current row) as s1, +sum(p_size) over (partition by p_mfgr order by p_name range between unbounded preceding and current row) as s2 +from noopwithmapstreaming(on + noop(on + noopstreaming(on part + partition by p_mfgr, p_name + order by p_mfgr, p_name) + )) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, p_name, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +p_size, +sum(p_size) over (partition by p_mfgr order by p_name range between unbounded preceding and current row) as s1, +sum(p_size) over (partition by p_mfgr order by p_name range between unbounded preceding and current row) as s2 +from noopwithmapstreaming(on + noop(on + noopstreaming(on part + partition by p_mfgr, p_name + order by p_mfgr, p_name) + )) +POSTHOOK: type: QUERY POSTHOOK: Input: default@part #### A masked pattern was here #### Manufacturer#1 almond antique burnished rose metallic 1 1 2 4 4 diff --git ql/src/test/results/clientpositive/spark/vectorized_ptf.q.out ql/src/test/results/clientpositive/spark/vectorized_ptf.q.out index 4f95e3f..bf79f78 100644 --- ql/src/test/results/clientpositive/spark/vectorized_ptf.q.out +++ ql/src/test/results/clientpositive/spark/vectorized_ptf.q.out @@ -244,7 +244,7 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string), BLOCK__OFFSET__INSIDE__FILE (type: bigint), INPUT__FILE__NAME (type: string), ROW__ID (type: struct) + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int), p_retailprice (type: double) auto parallelism: false Path -> Alias: #### A masked pattern was here #### @@ -295,6 +295,7 @@ STAGE PLANS: name: default.part_orc Truncated Path -> Alias: /part_orc [part_orc] + Execution mode: vectorized Reducer 2 Needs Tagging: false Reduce Operator Tree: @@ -302,18 +303,14 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE PTF Operator Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int), _col7 (type: double) - outputColumnNames: _col1, _col2, _col5, _col7 + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col2 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col2 (type: string) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int), _col7 (type: double) - auto parallelism: false + tag: -1 + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int), _col7 (type: double) + auto parallelism: false Reducer 3 Needs Tagging: false Reduce Operator Tree: @@ -534,7 +531,7 @@ STAGE PLANS: Map-reduce partition columns: p_partkey (type: int) Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE tag: 0 - value expressions: p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int) auto parallelism: false Path -> Alias: #### A masked pattern was here #### @@ -662,7 +659,7 @@ STAGE PLANS: keys: 0 p_partkey (type: int) 1 p_partkey (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + outputColumnNames: _col1, _col2, _col5 Statistics: Num rows: 14 Data size: 8823 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col2 (type: string), _col1 (type: string) @@ -670,7 +667,7 @@ STAGE PLANS: Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 14 Data size: 8823 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) auto parallelism: false Reducer 3 Needs Tagging: false @@ -679,18 +676,14 @@ STAGE PLANS: Statistics: Num rows: 14 Data size: 8823 Basic stats: COMPLETE Column stats: NONE PTF Operator Statistics: Num rows: 14 Data size: 8823 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) - outputColumnNames: _col1, _col2, _col5 + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 14 Data size: 8823 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col2 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col2 (type: string) - Statistics: Num rows: 14 Data size: 8823 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) - auto parallelism: false + tag: -1 + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) + auto parallelism: false Reducer 4 Needs Tagging: false Reduce Operator Tree: @@ -846,7 +839,7 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string), BLOCK__OFFSET__INSIDE__FILE (type: bigint), INPUT__FILE__NAME (type: string), ROW__ID (type: struct) + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int) auto parallelism: false Path -> Alias: #### A masked pattern was here #### @@ -897,6 +890,7 @@ STAGE PLANS: name: default.part_orc Truncated Path -> Alias: /part_orc [part_orc] + Execution mode: vectorized Reducer 2 Needs Tagging: false Reduce Operator Tree: @@ -1103,7 +1097,7 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string), BLOCK__OFFSET__INSIDE__FILE (type: bigint), INPUT__FILE__NAME (type: string), ROW__ID (type: struct) + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int), p_retailprice (type: double) auto parallelism: false Path -> Alias: #### A masked pattern was here #### @@ -1154,6 +1148,7 @@ STAGE PLANS: name: default.part_orc Truncated Path -> Alias: /part_orc [part_orc] + Execution mode: vectorized Reducer 2 Needs Tagging: false Reduce Operator Tree: @@ -1161,18 +1156,14 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE PTF Operator Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int), _col7 (type: double) - outputColumnNames: _col1, _col2, _col5, _col7 + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col2 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col2 (type: string) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int), _col7 (type: double) - auto parallelism: false + tag: -1 + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int), _col7 (type: double) + auto parallelism: false Reducer 3 Needs Tagging: false Reduce Operator Tree: @@ -1391,7 +1382,7 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string), BLOCK__OFFSET__INSIDE__FILE (type: bigint), INPUT__FILE__NAME (type: string), ROW__ID (type: struct) + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int) auto parallelism: false Path -> Alias: #### A masked pattern was here #### @@ -1442,6 +1433,7 @@ STAGE PLANS: name: default.part_orc Truncated Path -> Alias: /part_orc [part_orc] + Execution mode: vectorized Reducer 2 Needs Tagging: false Reduce Operator Tree: @@ -1449,18 +1441,14 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE PTF Operator Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) - outputColumnNames: _col1, _col2, _col5 + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col2 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col2 (type: string) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) - auto parallelism: false + tag: -1 + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) + auto parallelism: false Reducer 3 Needs Tagging: false Reduce Operator Tree: @@ -1689,7 +1677,7 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string), BLOCK__OFFSET__INSIDE__FILE (type: bigint), INPUT__FILE__NAME (type: string), ROW__ID (type: struct) + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int) auto parallelism: false Path -> Alias: #### A masked pattern was here #### @@ -1740,6 +1728,7 @@ STAGE PLANS: name: default.part_orc Truncated Path -> Alias: /part_orc [part_orc] + Execution mode: vectorized Reducer 2 Needs Tagging: false Reduce Operator Tree: @@ -1953,7 +1942,7 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string), BLOCK__OFFSET__INSIDE__FILE (type: bigint), INPUT__FILE__NAME (type: string), ROW__ID (type: struct) + value expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) auto parallelism: false Path -> Alias: #### A masked pattern was here #### @@ -2004,6 +1993,7 @@ STAGE PLANS: name: default.part_orc Truncated Path -> Alias: /part_orc [part_orc] + Execution mode: vectorized Map 4 Map Operator Tree: TableScan @@ -2078,22 +2068,18 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE PTF Operator Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Filter Operator - isSamplingPred: false - predicate: _col0 is not null (type: boolean) + Filter Operator + isSamplingPred: false + predicate: _col0 is not null (type: boolean) + Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE - tag: 0 - value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) - auto parallelism: false + tag: 0 + value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + auto parallelism: false Reducer 3 Needs Tagging: true Reduce Operator Tree: @@ -2333,7 +2319,7 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string), BLOCK__OFFSET__INSIDE__FILE (type: bigint), INPUT__FILE__NAME (type: string), ROW__ID (type: struct) + value expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) auto parallelism: false Path -> Alias: #### A masked pattern was here #### @@ -2384,6 +2370,7 @@ STAGE PLANS: name: default.part_orc Truncated Path -> Alias: /part_orc [part_orc] + Execution mode: vectorized Reducer 2 Needs Tagging: true Reduce Operator Tree: @@ -2427,22 +2414,18 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE PTF Operator Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Filter Operator - isSamplingPred: false - predicate: _col0 is not null (type: boolean) + Filter Operator + isSamplingPred: false + predicate: _col0 is not null (type: boolean) + Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE - tag: 1 - value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) - auto parallelism: false + tag: 1 + value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + auto parallelism: false Stage: Stage-0 Fetch Operator @@ -2590,7 +2573,7 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string), BLOCK__OFFSET__INSIDE__FILE (type: bigint), INPUT__FILE__NAME (type: string), ROW__ID (type: struct) + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int) auto parallelism: false Path -> Alias: #### A masked pattern was here #### @@ -2648,18 +2631,14 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE PTF Operator Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) - outputColumnNames: _col1, _col2, _col5 + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int) + sort order: ++- + Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int) - sort order: ++- - Map-reduce partition columns: _col2 (type: string) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) - auto parallelism: false + tag: -1 + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) + auto parallelism: false Reducer 3 Needs Tagging: false Reduce Operator Tree: @@ -2867,7 +2846,7 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string), BLOCK__OFFSET__INSIDE__FILE (type: bigint), INPUT__FILE__NAME (type: string), ROW__ID (type: struct) + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int), p_retailprice (type: double) auto parallelism: false Path -> Alias: #### A masked pattern was here #### @@ -2925,18 +2904,14 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE PTF Operator Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int), _col7 (type: double) - outputColumnNames: _col1, _col2, _col5, _col7 + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col2 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col2 (type: string) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int), _col7 (type: double) - auto parallelism: false + tag: -1 + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int), _col7 (type: double) + auto parallelism: false Reducer 3 Needs Tagging: false Reduce Operator Tree: @@ -3146,7 +3121,7 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string), BLOCK__OFFSET__INSIDE__FILE (type: bigint), INPUT__FILE__NAME (type: string), ROW__ID (type: struct) + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int), p_retailprice (type: double) auto parallelism: false Path -> Alias: #### A masked pattern was here #### @@ -3197,6 +3172,7 @@ STAGE PLANS: name: default.part_orc Truncated Path -> Alias: /part_orc [part_orc] + Execution mode: vectorized Reducer 2 Needs Tagging: false Reduce Operator Tree: @@ -3204,18 +3180,14 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE PTF Operator Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int), _col7 (type: double) - outputColumnNames: _col1, _col2, _col5, _col7 + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col2 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col2 (type: string) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int), _col7 (type: double) - auto parallelism: false + tag: -1 + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int), _col7 (type: double) + auto parallelism: false Reducer 3 Needs Tagging: false Reduce Operator Tree: @@ -3435,7 +3407,7 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string), BLOCK__OFFSET__INSIDE__FILE (type: bigint), INPUT__FILE__NAME (type: string), ROW__ID (type: struct) + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int), p_retailprice (type: double) auto parallelism: false Path -> Alias: #### A masked pattern was here #### @@ -3486,6 +3458,7 @@ STAGE PLANS: name: default.part_orc Truncated Path -> Alias: /part_orc [part_orc] + Execution mode: vectorized Reducer 2 Needs Tagging: false Reduce Operator Tree: @@ -3501,7 +3474,7 @@ STAGE PLANS: Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: bigint), _col10 (type: string), _col11 (type: struct) + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int), _col7 (type: double) auto parallelism: false Reducer 3 Needs Tagging: false @@ -3510,18 +3483,14 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE PTF Operator Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int), _col7 (type: double) - outputColumnNames: _col1, _col2, _col5, _col7 + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col2 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col2 (type: string) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int), _col7 (type: double) - auto parallelism: false + tag: -1 + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int), _col7 (type: double) + auto parallelism: false Reducer 4 Needs Tagging: false Reduce Operator Tree: @@ -3761,7 +3730,7 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string), BLOCK__OFFSET__INSIDE__FILE (type: bigint), INPUT__FILE__NAME (type: string), ROW__ID (type: struct) + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int), p_retailprice (type: double) auto parallelism: false Path -> Alias: #### A masked pattern was here #### @@ -3812,6 +3781,7 @@ STAGE PLANS: name: default.part_orc Truncated Path -> Alias: /part_orc [part_orc] + Execution mode: vectorized Reducer 2 Needs Tagging: false Reduce Operator Tree: @@ -3819,18 +3789,14 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE PTF Operator Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int), _col7 (type: double) - outputColumnNames: _col1, _col2, _col5, _col7 + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col2 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col2 (type: string) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int), _col7 (type: double) - auto parallelism: false + tag: -1 + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int), _col7 (type: double) + auto parallelism: false Reducer 3 Needs Tagging: false Reduce Operator Tree: @@ -4147,7 +4113,7 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string), BLOCK__OFFSET__INSIDE__FILE (type: bigint), INPUT__FILE__NAME (type: string), ROW__ID (type: struct) + value expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_size (type: int), p_retailprice (type: double) auto parallelism: false Path -> Alias: #### A masked pattern was here #### @@ -4198,6 +4164,7 @@ STAGE PLANS: name: default.part_orc Truncated Path -> Alias: /part_orc [part_orc] + Execution mode: vectorized Map 5 Map Operator Tree: TableScan @@ -4272,22 +4239,18 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE PTF Operator Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col5 (type: int), _col7 (type: double) - outputColumnNames: _col0, _col1, _col2, _col5, _col7 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Filter Operator - isSamplingPred: false - predicate: _col0 is not null (type: boolean) + Filter Operator + isSamplingPred: false + predicate: _col0 is not null (type: boolean) + Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE - tag: 0 - value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int), _col7 (type: double) - auto parallelism: false + tag: 0 + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int), _col7 (type: double) + auto parallelism: false Reducer 3 Needs Tagging: true Reduce Operator Tree: @@ -4473,7 +4436,7 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string), BLOCK__OFFSET__INSIDE__FILE (type: bigint), INPUT__FILE__NAME (type: string), ROW__ID (type: struct) + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int) auto parallelism: false Path -> Alias: #### A masked pattern was here #### @@ -4524,6 +4487,7 @@ STAGE PLANS: name: default.part_orc Truncated Path -> Alias: /part_orc [part_orc] + Execution mode: vectorized Reducer 2 Needs Tagging: false Reduce Operator Tree: @@ -5188,10 +5152,10 @@ TOK_QUERY STAGE DEPENDENCIES: Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 - Stage-4 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-0 + Stage-1 depends on stages: Stage-2 + Stage-4 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-2 @@ -5200,8 +5164,8 @@ STAGE PLANS: Reducer 6 <- Map 1 (PARTITION-LEVEL SORT, 2) Reducer 7 <- Map 1 (PARTITION-LEVEL SORT, 2) Reducer 3 <- Reducer 6 (PARTITION-LEVEL SORT, 2) - Reducer 5 <- Reducer 7 (PARTITION-LEVEL SORT, 2) - Reducer 4 <- Reducer 3 (PARTITION-LEVEL SORT, 2) + Reducer 4 <- Reducer 7 (PARTITION-LEVEL SORT, 2) + Reducer 5 <- Reducer 4 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -5216,7 +5180,7 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string), BLOCK__OFFSET__INSIDE__FILE (type: bigint), INPUT__FILE__NAME (type: string), ROW__ID (type: struct) + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int), p_retailprice (type: double) auto parallelism: false Path -> Alias: #### A masked pattern was here #### @@ -5267,6 +5231,7 @@ STAGE PLANS: name: default.part_orc Truncated Path -> Alias: /part_orc [part_orc] + Execution mode: vectorized Reducer 3 Needs Tagging: false Reduce Operator Tree: @@ -5274,28 +5239,13 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE PTF Operator Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col2 (type: string), _col2 (type: string), _col1 (type: string) - sort order: +++ - Map-reduce partition columns: _col2 (type: string) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _wcol0 (type: bigint), _col1 (type: string), _col2 (type: string), _col5 (type: int) - auto parallelism: false - Reducer 4 - Needs Tagging: false - Reduce Operator Tree: - Extract - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - PTF Operator - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col3 (type: string), _col2 (type: string), _col6 (type: int), UDFToInteger(round(_col0, 1)) (type: int), _wcol1 (type: int), _wcol2 (type: int), _wcol3 (type: double), _wcol4 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 2 + GlobalTableId: 1 #### A masked pattern was here #### NumFilesPerFileSink: 1 Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE @@ -5305,20 +5255,35 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: bucket_count -1 - columns p_mfgr,p_name,p_size,s2,r,dr,cud,fv1 + columns p_mfgr,p_name,p_size,r,dr,s columns.comments - columns.types string:string:int:int:int:int:double:int + columns.types string:string:int:int:int:double #### A masked pattern was here #### - name default.part_5 - serialization.ddl struct part_5 { string p_mfgr, string p_name, i32 p_size, i32 s2, i32 r, i32 dr, double cud, i32 fv1} + name default.part_4 + serialization.ddl struct part_4 { string p_mfgr, string p_name, i32 p_size, i32 r, i32 dr, double s} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.part_5 + name: default.part_4 TotalFiles: 1 GatherStats: true MultiFileSpray: false + Reducer 4 + Needs Tagging: false + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: string), _col2 (type: string), _col1 (type: string) + sort order: +++ + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _wcol0 (type: bigint), _col1 (type: string), _col2 (type: string), _col5 (type: int) + auto parallelism: false Reducer 5 Needs Tagging: false Reduce Operator Tree: @@ -5327,12 +5292,12 @@ STAGE PLANS: PTF Operator Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: double) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + expressions: _col3 (type: string), _col2 (type: string), _col6 (type: int), UDFToInteger(round(_col0, 1)) (type: int), _wcol1 (type: int), _wcol2 (type: int), _wcol3 (type: double), _wcol4 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + GlobalTableId: 2 #### A masked pattern was here #### NumFilesPerFileSink: 1 Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE @@ -5342,17 +5307,17 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: bucket_count -1 - columns p_mfgr,p_name,p_size,r,dr,s + columns p_mfgr,p_name,p_size,s2,r,dr,cud,fv1 columns.comments - columns.types string:string:int:int:int:double + columns.types string:string:int:int:int:int:double:int #### A masked pattern was here #### - name default.part_4 - serialization.ddl struct part_4 { string p_mfgr, string p_name, i32 p_size, i32 r, i32 dr, double s} + name default.part_5 + serialization.ddl struct part_5 { string p_mfgr, string p_name, i32 p_size, i32 s2, i32 r, i32 dr, double cud, i32 fv1} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.part_4 + name: default.part_5 TotalFiles: 1 GatherStats: true MultiFileSpray: false @@ -5364,12 +5329,12 @@ STAGE PLANS: PTF Operator Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col2 (type: string), _col5 (type: int) + key expressions: _col2 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int), _col7 (type: double) auto parallelism: false Reducer 7 Needs Tagging: false @@ -5378,20 +5343,16 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE PTF Operator Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int), _col7 (type: double) - outputColumnNames: _col1, _col2, _col5, _col7 + Reduce Output Operator + key expressions: _col2 (type: string), _col5 (type: int) + sort order: ++ + Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col2 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col2 (type: string) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int), _col7 (type: double) - auto parallelism: false + tag: -1 + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) + auto parallelism: false - Stage: Stage-1 + Stage: Stage-0 Move Operator tables: replace: true @@ -5401,23 +5362,23 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: bucket_count -1 - columns p_mfgr,p_name,p_size,s2,r,dr,cud,fv1 + columns p_mfgr,p_name,p_size,r,dr,s columns.comments - columns.types string:string:int:int:int:int:double:int + columns.types string:string:int:int:int:double #### A masked pattern was here #### - name default.part_5 - serialization.ddl struct part_5 { string p_mfgr, string p_name, i32 p_size, i32 s2, i32 r, i32 dr, double cud, i32 fv1} + name default.part_4 + serialization.ddl struct part_4 { string p_mfgr, string p_name, i32 p_size, i32 r, i32 dr, double s} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.part_5 + name: default.part_4 Stage: Stage-3 Stats-Aggr Operator #### A masked pattern was here #### - Stage: Stage-0 + Stage: Stage-1 Move Operator tables: replace: true @@ -5427,17 +5388,17 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: bucket_count -1 - columns p_mfgr,p_name,p_size,r,dr,s + columns p_mfgr,p_name,p_size,s2,r,dr,cud,fv1 columns.comments - columns.types string:string:int:int:int:double + columns.types string:string:int:int:int:int:double:int #### A masked pattern was here #### - name default.part_4 - serialization.ddl struct part_4 { string p_mfgr, string p_name, i32 p_size, i32 r, i32 dr, double s} + name default.part_5 + serialization.ddl struct part_5 { string p_mfgr, string p_name, i32 p_size, i32 s2, i32 r, i32 dr, double cud, i32 fv1} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.part_4 + name: default.part_5 Stage: Stage-4 Stats-Aggr Operator @@ -5728,7 +5689,7 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string), BLOCK__OFFSET__INSIDE__FILE (type: bigint), INPUT__FILE__NAME (type: string), ROW__ID (type: struct) + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int) auto parallelism: false Path -> Alias: #### A masked pattern was here #### @@ -5779,6 +5740,7 @@ STAGE PLANS: name: default.part_orc Truncated Path -> Alias: /part_orc [part_orc] + Execution mode: vectorized Reducer 2 Needs Tagging: false Reduce Operator Tree: @@ -5794,7 +5756,7 @@ STAGE PLANS: Map-reduce partition columns: _col2 (type: string), _col1 (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: bigint), _col10 (type: string), _col11 (type: struct) + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) auto parallelism: false Reducer 3 Needs Tagging: false @@ -5803,18 +5765,14 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE PTF Operator Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) - outputColumnNames: _col1, _col2, _col5 + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string), _col2 (type: string), _col1 (type: string) + sort order: ++++ + Map-reduce partition columns: _col2 (type: string), _col1 (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col2 (type: string), _col1 (type: string), _col2 (type: string), _col1 (type: string) - sort order: ++++ - Map-reduce partition columns: _col2 (type: string), _col1 (type: string) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) - auto parallelism: false + tag: -1 + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) + auto parallelism: false Reducer 4 Needs Tagging: false Reduce Operator Tree: @@ -6085,7 +6043,7 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string), BLOCK__OFFSET__INSIDE__FILE (type: bigint), INPUT__FILE__NAME (type: string), ROW__ID (type: struct) + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int) auto parallelism: false Path -> Alias: #### A masked pattern was here #### @@ -6136,6 +6094,7 @@ STAGE PLANS: name: default.part_orc Truncated Path -> Alias: /part_orc [part_orc] + Execution mode: vectorized Reducer 2 Needs Tagging: false Reduce Operator Tree: @@ -6149,7 +6108,7 @@ STAGE PLANS: Map-reduce partition columns: _col2 (type: string), _col1 (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: bigint), _col10 (type: string), _col11 (type: struct) + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) auto parallelism: false Reducer 3 Needs Tagging: false @@ -6164,7 +6123,7 @@ STAGE PLANS: Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: bigint), _col10 (type: string), _col11 (type: struct) + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) auto parallelism: false Reducer 4 Needs Tagging: false @@ -6173,18 +6132,14 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE PTF Operator Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) - outputColumnNames: _col1, _col2, _col5 + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col2 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col2 (type: string) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) - auto parallelism: false + tag: -1 + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) + auto parallelism: false Reducer 5 Needs Tagging: false Reduce Operator Tree: @@ -6438,7 +6393,7 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string), p_name (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string), BLOCK__OFFSET__INSIDE__FILE (type: bigint), INPUT__FILE__NAME (type: string), ROW__ID (type: struct) + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int) auto parallelism: false Path -> Alias: #### A masked pattern was here #### @@ -6489,6 +6444,7 @@ STAGE PLANS: name: default.part_orc Truncated Path -> Alias: /part_orc [part_orc] + Execution mode: vectorized Reducer 2 Needs Tagging: false Reduce Operator Tree: @@ -6502,7 +6458,7 @@ STAGE PLANS: Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: bigint), _col10 (type: string), _col11 (type: struct) + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) auto parallelism: false Reducer 3 Needs Tagging: false @@ -6511,18 +6467,14 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE PTF Operator Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) - outputColumnNames: _col1, _col2, _col5 + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col2 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col2 (type: string) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) - auto parallelism: false + tag: -1 + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) + auto parallelism: false Reducer 4 Needs Tagging: false Reduce Operator Tree: @@ -6788,7 +6740,7 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string), p_name (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string), BLOCK__OFFSET__INSIDE__FILE (type: bigint), INPUT__FILE__NAME (type: string), ROW__ID (type: struct) + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int) auto parallelism: false Path -> Alias: #### A masked pattern was here #### @@ -6839,6 +6791,7 @@ STAGE PLANS: name: default.part_orc Truncated Path -> Alias: /part_orc [part_orc] + Execution mode: vectorized Reducer 2 Needs Tagging: false Reduce Operator Tree: @@ -6852,7 +6805,7 @@ STAGE PLANS: Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: bigint), _col10 (type: string), _col11 (type: struct) + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) auto parallelism: false Reducer 3 Needs Tagging: false @@ -6869,7 +6822,7 @@ STAGE PLANS: Map-reduce partition columns: _col2 (type: string), _col1 (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: bigint), _col10 (type: string), _col11 (type: struct) + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) auto parallelism: false Reducer 4 Needs Tagging: false @@ -6878,18 +6831,14 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE PTF Operator Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) - outputColumnNames: _col1, _col2, _col5 + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string), _col2 (type: string), _col1 (type: string) + sort order: ++++ + Map-reduce partition columns: _col2 (type: string), _col1 (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col2 (type: string), _col1 (type: string), _col2 (type: string), _col1 (type: string) - sort order: ++++ - Map-reduce partition columns: _col2 (type: string), _col1 (type: string) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) - auto parallelism: false + tag: -1 + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) + auto parallelism: false Reducer 5 Needs Tagging: false Reduce Operator Tree: @@ -7186,7 +7135,7 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string), p_name (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string), BLOCK__OFFSET__INSIDE__FILE (type: bigint), INPUT__FILE__NAME (type: string), ROW__ID (type: struct) + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int) auto parallelism: false Path -> Alias: #### A masked pattern was here #### @@ -7237,6 +7186,7 @@ STAGE PLANS: name: default.part_orc Truncated Path -> Alias: /part_orc [part_orc] + Execution mode: vectorized Reducer 2 Needs Tagging: false Reduce Operator Tree: @@ -7252,7 +7202,7 @@ STAGE PLANS: Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: bigint), _col10 (type: string), _col11 (type: struct) + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) auto parallelism: false Reducer 3 Needs Tagging: false @@ -7261,18 +7211,14 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE PTF Operator Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) - outputColumnNames: _col1, _col2, _col5 + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string), _col2 (type: string), _col1 (type: string) + sort order: ++++ + Map-reduce partition columns: _col2 (type: string), _col1 (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col2 (type: string), _col1 (type: string), _col2 (type: string), _col1 (type: string) - sort order: ++++ - Map-reduce partition columns: _col2 (type: string), _col1 (type: string) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) - auto parallelism: false + tag: -1 + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) + auto parallelism: false Reducer 4 Needs Tagging: false Reduce Operator Tree: @@ -7535,7 +7481,7 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string), p_name (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string), BLOCK__OFFSET__INSIDE__FILE (type: bigint), INPUT__FILE__NAME (type: string), ROW__ID (type: struct) + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int) auto parallelism: false Path -> Alias: #### A masked pattern was here #### @@ -7586,6 +7532,7 @@ STAGE PLANS: name: default.part_orc Truncated Path -> Alias: /part_orc [part_orc] + Execution mode: vectorized Reducer 2 Needs Tagging: false Reduce Operator Tree: @@ -7601,7 +7548,7 @@ STAGE PLANS: Map-reduce partition columns: _col2 (type: string), _col1 (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: bigint), _col10 (type: string), _col11 (type: struct) + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) auto parallelism: false Reducer 3 Needs Tagging: false @@ -7610,18 +7557,14 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE PTF Operator Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) - outputColumnNames: _col1, _col2, _col5 + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col2 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col2 (type: string) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) - auto parallelism: false + tag: -1 + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) + auto parallelism: false Reducer 4 Needs Tagging: false Reduce Operator Tree: diff --git ql/src/test/results/clientpositive/tez/ptf.q.out ql/src/test/results/clientpositive/tez/ptf.q.out index 9196b94..1fefea0 100644 --- ql/src/test/results/clientpositive/tez/ptf.q.out +++ ql/src/test/results/clientpositive/tez/ptf.q.out @@ -1,6 +1,7 @@ PREHOOK: query: -- SORT_QUERY_RESULTS --1. test1 +explain select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -10,11 +11,10 @@ from noop(on part order by p_name ) PREHOOK: type: QUERY -PREHOOK: Input: default@part -#### A masked pattern was here #### POSTHOOK: query: -- SORT_QUERY_RESULTS --1. test1 +explain select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -24,6 +24,85 @@ from noop(on part order by p_name ) POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_mfgr (type: string), p_name (type: string) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int), p_retailprice (type: double) + Reducer 2 + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int), _col7 (type: double) + Reducer 3 + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr, p_name, p_size, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +sum(p_retailprice) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s1 +from noop(on part + partition by p_mfgr + order by p_name + ) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, p_name, p_size, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +sum(p_retailprice) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s1 +from noop(on part + partition by p_mfgr + order by p_name + ) +POSTHOOK: type: QUERY POSTHOOK: Input: default@part #### A masked pattern was here #### Manufacturer#1 almond antique burnished rose metallic 2 1 1 1173.15 @@ -53,21 +132,127 @@ Manufacturer#5 almond antique sky peru orange 2 3 3 5190.08 Manufacturer#5 almond aquamarine dodger light gainsboro 46 4 4 6208.18 Manufacturer#5 almond azure blanched chiffon midnight 23 5 5 7672.66 PREHOOK: query: -- 2. testJoinWithNoop +explain select p_mfgr, p_name, p_size, p_size - lag(p_size,1,p_size) over (partition by p_mfgr order by p_name) as deltaSz from noop (on (select p1.* from part p1 join part p2 on p1.p_partkey = p2.p_partkey) j distribute by j.p_mfgr sort by j.p_name) PREHOOK: type: QUERY -PREHOOK: Input: default@part -#### A masked pattern was here #### POSTHOOK: query: -- 2. testJoinWithNoop +explain select p_mfgr, p_name, p_size, p_size - lag(p_size,1,p_size) over (partition by p_mfgr order by p_name) as deltaSz from noop (on (select p1.* from part p1 join part p2 on p1.p_partkey = p2.p_partkey) j distribute by j.p_mfgr sort by j.p_name) POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: p1 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: p_partkey is not null (type: boolean) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_partkey (type: int) + sort order: + + Map-reduce partition columns: p_partkey (type: int) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int) + Map 5 + Map Operator Tree: + TableScan + alias: p2 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: p_partkey is not null (type: boolean) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_partkey (type: int) + sort order: + + Map-reduce partition columns: p_partkey (type: int) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 p_partkey (type: int) + 1 p_partkey (type: int) + outputColumnNames: _col1, _col2, _col5 + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) + Reducer 3 + Reduce Operator Tree: + Extract + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) + Reducer 4 + Reduce Operator Tree: + Extract + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), (_col5 - _wcol0) (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr, p_name, +p_size, p_size - lag(p_size,1,p_size) over (partition by p_mfgr order by p_name) as deltaSz +from noop (on (select p1.* from part p1 join part p2 on p1.p_partkey = p2.p_partkey) j +distribute by j.p_mfgr +sort by j.p_name) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, p_name, +p_size, p_size - lag(p_size,1,p_size) over (partition by p_mfgr order by p_name) as deltaSz +from noop (on (select p1.* from part p1 join part p2 on p1.p_partkey = p2.p_partkey) j +distribute by j.p_mfgr +sort by j.p_name) +POSTHOOK: type: QUERY POSTHOOK: Input: default@part #### A masked pattern was here #### Manufacturer#1 almond antique burnished rose metallic 2 0 @@ -99,19 +284,77 @@ Manufacturer#5 almond antique sky peru orange 2 -4 Manufacturer#5 almond aquamarine dodger light gainsboro 46 44 Manufacturer#5 almond azure blanched chiffon midnight 23 -23 PREHOOK: query: -- 3. testOnlyPTF +explain select p_mfgr, p_name, p_size from noop(on part partition by p_mfgr order by p_name) PREHOOK: type: QUERY -PREHOOK: Input: default@part -#### A masked pattern was here #### POSTHOOK: query: -- 3. testOnlyPTF +explain select p_mfgr, p_name, p_size from noop(on part partition by p_mfgr order by p_name) POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_mfgr (type: string), p_name (type: string) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int) + Reducer 2 + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr, p_name, p_size +from noop(on part +partition by p_mfgr +order by p_name) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, p_name, p_size +from noop(on part +partition by p_mfgr +order by p_name) +POSTHOOK: type: QUERY POSTHOOK: Input: default@part #### A masked pattern was here #### Manufacturer#1 almond antique burnished rose metallic 2 @@ -141,6 +384,7 @@ Manufacturer#5 almond antique sky peru orange 2 Manufacturer#5 almond aquamarine dodger light gainsboro 46 Manufacturer#5 almond azure blanched chiffon midnight 23 PREHOOK: query: -- 4. testPTFAlias +explain select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -150,9 +394,8 @@ from noop(on part order by p_name ) abc PREHOOK: type: QUERY -PREHOOK: Input: default@part -#### A masked pattern was here #### POSTHOOK: query: -- 4. testPTFAlias +explain select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -162,6 +405,85 @@ from noop(on part order by p_name ) abc POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_mfgr (type: string), p_name (type: string) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int), p_retailprice (type: double) + Reducer 2 + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int), _col7 (type: double) + Reducer 3 + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr, p_name, p_size, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +sum(p_retailprice) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s1 +from noop(on part + partition by p_mfgr + order by p_name + ) abc +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, p_name, p_size, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +sum(p_retailprice) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s1 +from noop(on part + partition by p_mfgr + order by p_name + ) abc +POSTHOOK: type: QUERY POSTHOOK: Input: default@part #### A masked pattern was here #### Manufacturer#1 almond antique burnished rose metallic 2 1 1 1173.15 @@ -191,6 +513,7 @@ Manufacturer#5 almond antique sky peru orange 2 3 3 5190.08 Manufacturer#5 almond aquamarine dodger light gainsboro 46 4 4 6208.18 Manufacturer#5 almond azure blanched chiffon midnight 23 5 5 7672.66 PREHOOK: query: -- 5. testPTFAndWhereWithWindowing +explain select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -200,9 +523,8 @@ from noop(on part order by p_name ) PREHOOK: type: QUERY -PREHOOK: Input: default@part -#### A masked pattern was here #### POSTHOOK: query: -- 5. testPTFAndWhereWithWindowing +explain select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -212,6 +534,85 @@ from noop(on part order by p_name ) POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_mfgr (type: string), p_name (type: string) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int) + Reducer 2 + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) + Reducer 3 + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _col5 (type: int), (_col5 - _wcol2) (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr, p_name, p_size, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +p_size, p_size - lag(p_size,1,p_size) over (partition by p_mfgr order by p_name) as deltaSz +from noop(on part + partition by p_mfgr + order by p_name + ) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, p_name, p_size, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +p_size, p_size - lag(p_size,1,p_size) over (partition by p_mfgr order by p_name) as deltaSz +from noop(on part + partition by p_mfgr + order by p_name + ) +POSTHOOK: type: QUERY POSTHOOK: Input: default@part #### A masked pattern was here #### Manufacturer#1 almond antique burnished rose metallic 2 1 1 2 0 @@ -241,6 +642,7 @@ Manufacturer#5 almond antique sky peru orange 2 3 3 2 -4 Manufacturer#5 almond aquamarine dodger light gainsboro 46 4 4 46 44 Manufacturer#5 almond azure blanched chiffon midnight 23 5 5 23 -23 PREHOOK: query: -- 6. testSWQAndPTFAndGBy +explain select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -251,9 +653,8 @@ from noop(on part ) group by p_mfgr, p_name, p_size PREHOOK: type: QUERY -PREHOOK: Input: default@part -#### A masked pattern was here #### POSTHOOK: query: -- 6. testSWQAndPTFAndGBy +explain select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -264,6 +665,109 @@ from noop(on part ) group by p_mfgr, p_name, p_size POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_mfgr (type: string), p_name (type: string) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int) + Reducer 2 + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int) + outputColumnNames: _col2, _col1, _col5 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col2 (type: string), _col1 (type: string), _col5 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reducer 3 + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + Reducer 4 + Reduce Operator Tree: + Extract + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int), _wcol0 (type: int), _wcol1 (type: int), _col2 (type: int), (_col2 - _wcol2) (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr, p_name, p_size, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +p_size, p_size - lag(p_size,1,p_size) over (partition by p_mfgr order by p_name) as deltaSz +from noop(on part + partition by p_mfgr + order by p_name + ) +group by p_mfgr, p_name, p_size +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, p_name, p_size, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +p_size, p_size - lag(p_size,1,p_size) over (partition by p_mfgr order by p_name) as deltaSz +from noop(on part + partition by p_mfgr + order by p_name + ) +group by p_mfgr, p_name, p_size +POSTHOOK: type: QUERY POSTHOOK: Input: default@part #### A masked pattern was here #### Manufacturer#1 almond antique burnished rose metallic 2 1 1 2 0 @@ -292,21 +796,114 @@ Manufacturer#5 almond antique sky peru orange 2 3 3 2 -4 Manufacturer#5 almond aquamarine dodger light gainsboro 46 4 4 46 44 Manufacturer#5 almond azure blanched chiffon midnight 23 5 5 23 -23 PREHOOK: query: -- 7. testJoin +explain select abc.* from noop(on part partition by p_mfgr order by p_name ) abc join part p1 on abc.p_partkey = p1.p_partkey PREHOOK: type: QUERY -PREHOOK: Input: default@part -#### A masked pattern was here #### POSTHOOK: query: -- 7. testJoin +explain select abc.* from noop(on part partition by p_mfgr order by p_name ) abc join part p1 on abc.p_partkey = p1.p_partkey POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Map 4 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_mfgr (type: string), p_name (type: string) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + Map 4 + Map Operator Tree: + TableScan + alias: p1 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: p_partkey is not null (type: boolean) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_partkey (type: int) + sort order: + + Map-reduce partition columns: p_partkey (type: int) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: _col0 is not null (type: boolean) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + Reducer 3 + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 p_partkey (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select abc.* +from noop(on part +partition by p_mfgr +order by p_name +) abc join part p1 on abc.p_partkey = p1.p_partkey +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select abc.* +from noop(on part +partition by p_mfgr +order by p_name +) abc join part p1 on abc.p_partkey = p1.p_partkey +POSTHOOK: type: QUERY POSTHOOK: Input: default@part #### A masked pattern was here #### 105685 almond antique violet chocolate turquoise Manufacturer#2 Brand#22 MEDIUM ANODIZED COPPER 14 MED CAN 1690.68 ly pending requ @@ -338,21 +935,114 @@ POSTHOOK: Input: default@part 86428 almond aquamarine burnished black steel Manufacturer#1 Brand#12 STANDARD ANODIZED STEEL 28 WRAP BAG 1414.42 arefully 90681 almond antique chartreuse khaki white Manufacturer#3 Brand#31 MEDIUM BURNISHED TIN 17 SM CASE 1671.68 are slyly after the sl PREHOOK: query: -- 8. testJoinRight +explain select abc.* from part p1 join noop(on part partition by p_mfgr order by p_name ) abc on abc.p_partkey = p1.p_partkey PREHOOK: type: QUERY -PREHOOK: Input: default@part -#### A masked pattern was here #### POSTHOOK: query: -- 8. testJoinRight +explain select abc.* from part p1 join noop(on part partition by p_mfgr order by p_name ) abc on abc.p_partkey = p1.p_partkey POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) + Reducer 4 <- Map 3 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: p1 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: p_partkey is not null (type: boolean) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_partkey (type: int) + sort order: + + Map-reduce partition columns: p_partkey (type: int) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Map 3 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_mfgr (type: string), p_name (type: string) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + Reducer 2 + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 p_partkey (type: int) + 1 _col0 (type: int) + outputColumnNames: _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col12 (type: int), _col13 (type: string), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: _col0 is not null (type: boolean) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select abc.* +from part p1 join noop(on part +partition by p_mfgr +order by p_name +) abc on abc.p_partkey = p1.p_partkey +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select abc.* +from part p1 join noop(on part +partition by p_mfgr +order by p_name +) abc on abc.p_partkey = p1.p_partkey +POSTHOOK: type: QUERY POSTHOOK: Input: default@part #### A masked pattern was here #### 105685 almond antique violet chocolate turquoise Manufacturer#2 Brand#22 MEDIUM ANODIZED COPPER 14 MED CAN 1690.68 ly pending requ @@ -384,21 +1074,96 @@ POSTHOOK: Input: default@part 86428 almond aquamarine burnished black steel Manufacturer#1 Brand#12 STANDARD ANODIZED STEEL 28 WRAP BAG 1414.42 arefully 90681 almond antique chartreuse khaki white Manufacturer#3 Brand#31 MEDIUM BURNISHED TIN 17 SM CASE 1671.68 are slyly after the sl PREHOOK: query: -- 9. testNoopWithMap +explain select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name, p_size desc) as r from noopwithmap(on part partition by p_mfgr order by p_name, p_size desc) PREHOOK: type: QUERY -PREHOOK: Input: default@part -#### A masked pattern was here #### POSTHOOK: query: -- 9. testNoopWithMap +explain select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name, p_size desc) as r from noopwithmap(on part partition by p_mfgr order by p_name, p_size desc) POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_mfgr (type: string), p_name (type: string), p_size (type: int) + sort order: ++- + Map-reduce partition columns: p_mfgr (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int) + Reducer 2 + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int) + sort order: ++- + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) + Reducer 3 + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr, p_name, p_size, +rank() over (partition by p_mfgr order by p_name, p_size desc) as r +from noopwithmap(on part +partition by p_mfgr +order by p_name, p_size desc) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, p_name, p_size, +rank() over (partition by p_mfgr order by p_name, p_size desc) as r +from noopwithmap(on part +partition by p_mfgr +order by p_name, p_size desc) +POSTHOOK: type: QUERY POSTHOOK: Input: default@part #### A masked pattern was here #### Manufacturer#1 almond antique burnished rose metallic 2 1 @@ -428,6 +1193,7 @@ Manufacturer#5 almond antique sky peru orange 2 3 Manufacturer#5 almond aquamarine dodger light gainsboro 46 4 Manufacturer#5 almond azure blanched chiffon midnight 23 5 PREHOOK: query: -- 10. testNoopWithMapWithWindowing +explain select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -436,9 +1202,8 @@ from noopwithmap(on part partition by p_mfgr order by p_name) PREHOOK: type: QUERY -PREHOOK: Input: default@part -#### A masked pattern was here #### POSTHOOK: query: -- 10. testNoopWithMapWithWindowing +explain select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -447,6 +1212,85 @@ from noopwithmap(on part partition by p_mfgr order by p_name) POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_mfgr (type: string), p_name (type: string) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int), p_retailprice (type: double) + Reducer 2 + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int), _col7 (type: double) + Reducer 3 + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr, p_name, p_size, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +sum(p_retailprice) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s1 +from noopwithmap(on part + partition by p_mfgr + order by p_name) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, p_name, p_size, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +sum(p_retailprice) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s1 +from noopwithmap(on part + partition by p_mfgr + order by p_name) +POSTHOOK: type: QUERY POSTHOOK: Input: default@part #### A masked pattern was here #### Manufacturer#1 almond antique burnished rose metallic 2 1 1 1173.15 @@ -476,6 +1320,7 @@ Manufacturer#5 almond antique sky peru orange 2 3 3 5190.08 Manufacturer#5 almond aquamarine dodger light gainsboro 46 4 4 6208.18 Manufacturer#5 almond azure blanched chiffon midnight 23 5 5 7672.66 PREHOOK: query: -- 11. testHavingWithWindowingPTFNoGBY +explain select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -484,9 +1329,8 @@ from noop(on part partition by p_mfgr order by p_name) PREHOOK: type: QUERY -PREHOOK: Input: default@part -#### A masked pattern was here #### POSTHOOK: query: -- 11. testHavingWithWindowingPTFNoGBY +explain select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -495,6 +1339,83 @@ from noop(on part partition by p_mfgr order by p_name) POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_mfgr (type: string), p_name (type: string) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int), p_retailprice (type: double) + Reducer 2 + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int), _col7 (type: double) + Reducer 3 + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr, p_name, p_size, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +sum(p_retailprice) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s1 +from noop(on part +partition by p_mfgr +order by p_name) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, p_name, p_size, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +sum(p_retailprice) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s1 +from noop(on part +partition by p_mfgr +order by p_name) +POSTHOOK: type: QUERY POSTHOOK: Input: default@part #### A masked pattern was here #### Manufacturer#1 almond antique burnished rose metallic 2 1 1 1173.15 @@ -524,6 +1445,7 @@ Manufacturer#5 almond antique sky peru orange 2 3 3 5190.08 Manufacturer#5 almond aquamarine dodger light gainsboro 46 4 4 6208.18 Manufacturer#5 almond azure blanched chiffon midnight 23 5 5 7672.66 PREHOOK: query: -- 12. testFunctionChain +explain select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -533,9 +1455,8 @@ partition by p_mfgr order by p_mfgr, p_name ))) PREHOOK: type: QUERY -PREHOOK: Input: default@part -#### A masked pattern was here #### POSTHOOK: query: -- 12. testFunctionChain +explain select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -545,6 +1466,100 @@ partition by p_mfgr order by p_mfgr, p_name ))) POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_mfgr (type: string), p_mfgr (type: string), p_name (type: string) + sort order: +++ + Map-reduce partition columns: p_mfgr (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int), p_retailprice (type: double) + Reducer 2 + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: string), _col2 (type: string), _col1 (type: string) + sort order: +++ + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int), _col7 (type: double) + Reducer 3 + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int), _col7 (type: double) + Reducer 4 + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr, p_name, p_size, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +sum(p_retailprice) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s1 +from noop(on noopwithmap(on noop(on part +partition by p_mfgr +order by p_mfgr, p_name +))) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, p_name, p_size, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +sum(p_retailprice) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s1 +from noop(on noopwithmap(on noop(on part +partition by p_mfgr +order by p_mfgr, p_name +))) +POSTHOOK: type: QUERY POSTHOOK: Input: default@part #### A masked pattern was here #### Manufacturer#1 almond antique burnished rose metallic 2 1 1 1173.15 @@ -574,6 +1589,7 @@ Manufacturer#5 almond antique sky peru orange 2 3 3 5190.08 Manufacturer#5 almond aquamarine dodger light gainsboro 46 4 4 6208.18 Manufacturer#5 almond azure blanched chiffon midnight 23 5 5 7672.66 PREHOOK: query: -- 13. testPTFAndWindowingInSubQ +explain select p_mfgr, p_name, sub1.cd, sub1.s1 from (select p_mfgr, p_name, @@ -586,9 +1602,8 @@ order by p_name) window w1 as (partition by p_mfgr order by p_name rows between 2 preceding and 2 following) ) sub1 PREHOOK: type: QUERY -PREHOOK: Input: default@part -#### A masked pattern was here #### POSTHOOK: query: -- 13. testPTFAndWindowingInSubQ +explain select p_mfgr, p_name, sub1.cd, sub1.s1 from (select p_mfgr, p_name, @@ -601,6 +1616,91 @@ order by p_name) window w1 as (partition by p_mfgr order by p_name rows between 2 preceding and 2 following) ) sub1 POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_mfgr (type: string), p_name (type: string) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int), p_retailprice (type: double) + Reducer 2 + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int), _col7 (type: double) + Reducer 3 + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col1 (type: string), _wcol0 (type: bigint), _wcol1 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr, p_name, +sub1.cd, sub1.s1 +from (select p_mfgr, p_name, +count(p_size) over (partition by p_mfgr order by p_name) as cd, +p_retailprice, +sum(p_retailprice) over w1 as s1 +from noop(on part +partition by p_mfgr +order by p_name) +window w1 as (partition by p_mfgr order by p_name rows between 2 preceding and 2 following) +) sub1 +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, p_name, +sub1.cd, sub1.s1 +from (select p_mfgr, p_name, +count(p_size) over (partition by p_mfgr order by p_name) as cd, +p_retailprice, +sum(p_retailprice) over w1 as s1 +from noop(on part +partition by p_mfgr +order by p_name) +window w1 as (partition by p_mfgr order by p_name rows between 2 preceding and 2 following) +) sub1 +POSTHOOK: type: QUERY POSTHOOK: Input: default@part #### A masked pattern was here #### Manufacturer#1 almond antique burnished rose metallic 2 4100.06 @@ -630,6 +1730,7 @@ Manufacturer#5 almond antique sky peru orange 3 7672.66 Manufacturer#5 almond aquamarine dodger light gainsboro 4 5882.970000000001 Manufacturer#5 almond azure blanched chiffon midnight 5 4271.3099999999995 PREHOOK: query: -- 14. testPTFJoinWithWindowingWithCount +explain select abc.p_mfgr, abc.p_name, rank() over (distribute by abc.p_mfgr sort by abc.p_name) as r, dense_rank() over (distribute by abc.p_mfgr sort by abc.p_name) as dr, @@ -641,9 +1742,8 @@ partition by p_mfgr order by p_name ) abc join part p1 on abc.p_partkey = p1.p_partkey PREHOOK: type: QUERY -PREHOOK: Input: default@part -#### A masked pattern was here #### POSTHOOK: query: -- 14. testPTFJoinWithWindowingWithCount +explain select abc.p_mfgr, abc.p_name, rank() over (distribute by abc.p_mfgr sort by abc.p_name) as r, dense_rank() over (distribute by abc.p_mfgr sort by abc.p_name) as dr, @@ -655,6 +1755,122 @@ partition by p_mfgr order by p_name ) abc join part p1 on abc.p_partkey = p1.p_partkey POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Map 5 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_mfgr (type: string), p_name (type: string) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_size (type: int), p_retailprice (type: double) + Map 5 + Map Operator Tree: + TableScan + alias: p1 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: p_partkey is not null (type: boolean) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_partkey (type: int) + sort order: + + Map-reduce partition columns: p_partkey (type: int) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: _col0 is not null (type: boolean) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int), _col7 (type: double) + Reducer 3 + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 p_partkey (type: int) + outputColumnNames: _col1, _col2, _col5, _col7 + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int), _col7 (type: double) + Reducer 4 + Reduce Operator Tree: + Extract + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col1 (type: string), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: bigint), _col7 (type: double), _wcol3 (type: double), _col5 (type: int), (_col5 - _wcol4) (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select abc.p_mfgr, abc.p_name, +rank() over (distribute by abc.p_mfgr sort by abc.p_name) as r, +dense_rank() over (distribute by abc.p_mfgr sort by abc.p_name) as dr, +count(abc.p_name) over (distribute by abc.p_mfgr sort by abc.p_name) as cd, +abc.p_retailprice, sum(abc.p_retailprice) over (distribute by abc.p_mfgr sort by abc.p_name rows between unbounded preceding and current row) as s1, +abc.p_size, abc.p_size - lag(abc.p_size,1,abc.p_size) over (distribute by abc.p_mfgr sort by abc.p_name) as deltaSz +from noop(on part +partition by p_mfgr +order by p_name +) abc join part p1 on abc.p_partkey = p1.p_partkey +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select abc.p_mfgr, abc.p_name, +rank() over (distribute by abc.p_mfgr sort by abc.p_name) as r, +dense_rank() over (distribute by abc.p_mfgr sort by abc.p_name) as dr, +count(abc.p_name) over (distribute by abc.p_mfgr sort by abc.p_name) as cd, +abc.p_retailprice, sum(abc.p_retailprice) over (distribute by abc.p_mfgr sort by abc.p_name rows between unbounded preceding and current row) as s1, +abc.p_size, abc.p_size - lag(abc.p_size,1,abc.p_size) over (distribute by abc.p_mfgr sort by abc.p_name) as deltaSz +from noop(on part +partition by p_mfgr +order by p_name +) abc join part p1 on abc.p_partkey = p1.p_partkey +POSTHOOK: type: QUERY POSTHOOK: Input: default@part #### A masked pattern was here #### Manufacturer#1 almond antique burnished rose metallic 1 1 4 1173.15 1173.15 2 0 @@ -686,19 +1902,99 @@ Manufacturer#5 almond antique sky peru orange 3 3 3 1788.73 5190.08 2 -4 Manufacturer#5 almond aquamarine dodger light gainsboro 4 4 4 1018.1 6208.18 46 44 Manufacturer#5 almond azure blanched chiffon midnight 5 5 5 1464.48 7672.66 23 -23 PREHOOK: query: -- 15. testDistinctInSelectWithPTF +explain select DISTINCT p_mfgr, p_name, p_size from noop(on part partition by p_mfgr order by p_name) PREHOOK: type: QUERY -PREHOOK: Input: default@part -#### A masked pattern was here #### POSTHOOK: query: -- 15. testDistinctInSelectWithPTF +explain select DISTINCT p_mfgr, p_name, p_size from noop(on part partition by p_mfgr order by p_name) POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_mfgr (type: string), p_name (type: string) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int) + Reducer 2 + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int) + outputColumnNames: _col2, _col1, _col5 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col2 (type: string), _col1 (type: string), _col5 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reducer 3 + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select DISTINCT p_mfgr, p_name, p_size +from noop(on part +partition by p_mfgr +order by p_name) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select DISTINCT p_mfgr, p_name, p_size +from noop(on part +partition by p_mfgr +order by p_name) +POSTHOOK: type: QUERY POSTHOOK: Input: default@part #### A masked pattern was here #### Manufacturer#1 almond antique burnished rose metallic 2 @@ -746,6 +2042,106 @@ POSTHOOK: type: CREATEVIEW POSTHOOK: Input: default@part POSTHOOK: Output: database:default POSTHOOK: Output: default@mfgr_price_view +PREHOOK: query: explain +select p_mfgr, p_brand, s, +sum(s) over w1 as s1 +from noop(on mfgr_price_view +partition by p_mfgr +order by p_mfgr) +window w1 as ( partition by p_mfgr order by p_brand rows between 2 preceding and current row) +PREHOOK: type: QUERY +POSTHOOK: query: explain +select p_mfgr, p_brand, s, +sum(s) over w1 as s1 +from noop(on mfgr_price_view +partition by p_mfgr +order by p_mfgr) +window w1 as ( partition by p_mfgr order by p_brand rows between 2 preceding and current row) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: p_mfgr (type: string), p_brand (type: string), p_retailprice (type: double) + outputColumnNames: p_mfgr, p_brand, p_retailprice + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(p_retailprice) + keys: p_mfgr (type: string), p_brand (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: double) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col0 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: double) + Reducer 3 + Reduce Operator Tree: + Extract + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: double) + Reducer 4 + Reduce Operator Tree: + Extract + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: double), _wcol0 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + PREHOOK: query: select p_mfgr, p_brand, s, sum(s) over w1 as s1 from noop(on mfgr_price_view @@ -828,6 +2224,163 @@ fv1 INT) POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@part_5 +PREHOOK: query: explain +from noop(on part +partition by p_mfgr +order by p_name) +INSERT OVERWRITE TABLE part_4 select p_mfgr, p_name, p_size, +rank() over (distribute by p_mfgr sort by p_name) as r, +dense_rank() over (distribute by p_mfgr sort by p_name) as dr, +sum(p_retailprice) over (distribute by p_mfgr sort by p_name rows between unbounded preceding and current row) as s +INSERT OVERWRITE TABLE part_5 select p_mfgr,p_name, p_size, +round(sum(p_size) over (distribute by p_mfgr sort by p_size range between 5 preceding and current row),1) as s2, +rank() over (distribute by p_mfgr sort by p_mfgr, p_name) as r, +dense_rank() over (distribute by p_mfgr sort by p_mfgr, p_name) as dr, +cume_dist() over (distribute by p_mfgr sort by p_mfgr, p_name) as cud, +first_value(p_size, true) over w1 as fv1 +window w1 as (distribute by p_mfgr sort by p_mfgr, p_name rows between 2 preceding and 2 following) +PREHOOK: type: QUERY +POSTHOOK: query: explain +from noop(on part +partition by p_mfgr +order by p_name) +INSERT OVERWRITE TABLE part_4 select p_mfgr, p_name, p_size, +rank() over (distribute by p_mfgr sort by p_name) as r, +dense_rank() over (distribute by p_mfgr sort by p_name) as dr, +sum(p_retailprice) over (distribute by p_mfgr sort by p_name rows between unbounded preceding and current row) as s +INSERT OVERWRITE TABLE part_5 select p_mfgr,p_name, p_size, +round(sum(p_size) over (distribute by p_mfgr sort by p_size range between 5 preceding and current row),1) as s2, +rank() over (distribute by p_mfgr sort by p_mfgr, p_name) as r, +dense_rank() over (distribute by p_mfgr sort by p_mfgr, p_name) as dr, +cume_dist() over (distribute by p_mfgr sort by p_mfgr, p_name) as cud, +first_value(p_size, true) over w1 as fv1 +window w1 as (distribute by p_mfgr sort by p_mfgr, p_name rows between 2 preceding and 2 following) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-2 is a root stage + Stage-3 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-3 + Stage-4 depends on stages: Stage-0 + Stage-1 depends on stages: Stage-3 + Stage-5 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-2 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 2 (SIMPLE_EDGE) + Reducer 5 <- Reducer 4 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_mfgr (type: string), p_name (type: string) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int), p_retailprice (type: double) + Reducer 2 + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int), _col7 (type: double) + Reduce Output Operator + key expressions: _col2 (type: string), _col5 (type: int) + sort order: ++ + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) + Reducer 3 + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.part_4 + Reducer 4 + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: string), _col2 (type: string), _col1 (type: string) + sort order: +++ + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _wcol0 (type: bigint), _col1 (type: string), _col2 (type: string), _col5 (type: int) + Reducer 5 + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col3 (type: string), _col2 (type: string), _col6 (type: int), UDFToInteger(round(_col0, 1)) (type: int), _wcol1 (type: int), _wcol2 (type: int), _wcol3 (type: double), _wcol4 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.part_5 + + Stage: Stage-3 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.part_4 + + Stage: Stage-4 + Stats-Aggr Operator + + Stage: Stage-1 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.part_5 + + Stage: Stage-5 + Stats-Aggr Operator + PREHOOK: query: from noop(on part partition by p_mfgr order by p_name) @@ -947,6 +2500,7 @@ Manufacturer#5 almond antique sky peru orange 2 2 3 3 0.6 31 Manufacturer#5 almond aquamarine dodger light gainsboro 46 46 4 4 0.8 6 Manufacturer#5 almond azure blanched chiffon midnight 23 23 5 5 1.0 2 PREHOOK: query: -- 18. testMulti2OperatorsFunctionChainWithMap +explain select p_mfgr, p_name, rank() over (partition by p_mfgr,p_name) as r, dense_rank() over (partition by p_mfgr,p_name) as dr, @@ -963,9 +2517,8 @@ from noop(on partition by p_mfgr,p_name order by p_mfgr,p_name) PREHOOK: type: QUERY -PREHOOK: Input: default@part -#### A masked pattern was here #### POSTHOOK: query: -- 18. testMulti2OperatorsFunctionChainWithMap +explain select p_mfgr, p_name, rank() over (partition by p_mfgr,p_name) as r, dense_rank() over (partition by p_mfgr,p_name) as dr, @@ -982,6 +2535,114 @@ from noop(on partition by p_mfgr,p_name order by p_mfgr,p_name) POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_mfgr (type: string), p_mfgr (type: string) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int) + Reducer 2 + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string), _col2 (type: string), _col1 (type: string) + sort order: ++++ + Map-reduce partition columns: _col2 (type: string), _col1 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) + Reducer 3 + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string), _col2 (type: string), _col1 (type: string) + sort order: ++++ + Map-reduce partition columns: _col2 (type: string), _col1 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) + Reducer 4 + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col1 (type: string), _wcol0 (type: int), _wcol1 (type: int), _col5 (type: int), _wcol2 (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr, p_name, +rank() over (partition by p_mfgr,p_name) as r, +dense_rank() over (partition by p_mfgr,p_name) as dr, +p_size, sum(p_size) over (partition by p_mfgr,p_name rows between unbounded preceding and current row) as s1 +from noop(on + noopwithmap(on + noop(on + noop(on part + partition by p_mfgr + order by p_mfgr) + ) + partition by p_mfgr,p_name + order by p_mfgr,p_name) + partition by p_mfgr,p_name + order by p_mfgr,p_name) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, p_name, +rank() over (partition by p_mfgr,p_name) as r, +dense_rank() over (partition by p_mfgr,p_name) as dr, +p_size, sum(p_size) over (partition by p_mfgr,p_name rows between unbounded preceding and current row) as s1 +from noop(on + noopwithmap(on + noop(on + noop(on part + partition by p_mfgr + order by p_mfgr) + ) + partition by p_mfgr,p_name + order by p_mfgr,p_name) + partition by p_mfgr,p_name + order by p_mfgr,p_name) +POSTHOOK: type: QUERY POSTHOOK: Input: default@part #### A masked pattern was here #### Manufacturer#1 almond antique burnished rose metallic 1 1 2 2 @@ -1011,6 +2672,7 @@ Manufacturer#5 almond antique sky peru orange 1 1 2 2 Manufacturer#5 almond aquamarine dodger light gainsboro 1 1 46 46 Manufacturer#5 almond azure blanched chiffon midnight 1 1 23 23 PREHOOK: query: -- 19. testMulti3OperatorsFunctionChain +explain select p_mfgr, p_name, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -1027,9 +2689,8 @@ from noop(on partition by p_mfgr order by p_mfgr ) PREHOOK: type: QUERY -PREHOOK: Input: default@part -#### A masked pattern was here #### POSTHOOK: query: -- 19. testMulti3OperatorsFunctionChain +explain select p_mfgr, p_name, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -1046,6 +2707,125 @@ from noop(on partition by p_mfgr order by p_mfgr ) POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) + Reducer 5 <- Reducer 4 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_mfgr (type: string), p_mfgr (type: string) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int) + Reducer 2 + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string), _col2 (type: string), _col1 (type: string) + sort order: ++++ + Map-reduce partition columns: _col2 (type: string), _col1 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) + Reducer 3 + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: string), _col2 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) + Reducer 4 + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) + Reducer 5 + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col1 (type: string), _wcol0 (type: int), _wcol1 (type: int), _col5 (type: int), _wcol2 (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr, p_name, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +p_size, sum(p_size) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s1 +from noop(on + noop(on + noop(on + noop(on part + partition by p_mfgr + order by p_mfgr) + ) + partition by p_mfgr,p_name + order by p_mfgr,p_name) + partition by p_mfgr + order by p_mfgr ) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, p_name, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +p_size, sum(p_size) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s1 +from noop(on + noop(on + noop(on + noop(on part + partition by p_mfgr + order by p_mfgr) + ) + partition by p_mfgr,p_name + order by p_mfgr,p_name) + partition by p_mfgr + order by p_mfgr ) +POSTHOOK: type: QUERY POSTHOOK: Input: default@part #### A masked pattern was here #### Manufacturer#1 almond antique burnished rose metallic 1 1 2 2 @@ -1075,6 +2855,7 @@ Manufacturer#5 almond antique sky peru orange 3 3 2 39 Manufacturer#5 almond aquamarine dodger light gainsboro 4 4 46 85 Manufacturer#5 almond azure blanched chiffon midnight 5 5 23 108 PREHOOK: query: -- 20. testMultiOperatorChainWithNoWindowing +explain select p_mfgr, p_name, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -1089,9 +2870,8 @@ from noop(on partition by p_mfgr order by p_mfgr)) PREHOOK: type: QUERY -PREHOOK: Input: default@part -#### A masked pattern was here #### POSTHOOK: query: -- 20. testMultiOperatorChainWithNoWindowing +explain select p_mfgr, p_name, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -1106,6 +2886,108 @@ from noop(on partition by p_mfgr order by p_mfgr)) POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_mfgr (type: string), p_name (type: string), p_mfgr (type: string), p_name (type: string) + sort order: ++++ + Map-reduce partition columns: p_mfgr (type: string), p_name (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int) + Reducer 2 + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: string), _col2 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) + Reducer 3 + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) + Reducer 4 + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col1 (type: string), _wcol0 (type: int), _wcol1 (type: int), _col5 (type: int), _wcol2 (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr, p_name, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +p_size, sum(p_size) over (partition by p_mfgr order by p_name) as s1 +from noop(on + noop(on + noop(on + noop(on part + partition by p_mfgr,p_name + order by p_mfgr,p_name) + ) + partition by p_mfgr + order by p_mfgr)) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, p_name, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +p_size, sum(p_size) over (partition by p_mfgr order by p_name) as s1 +from noop(on + noop(on + noop(on + noop(on part + partition by p_mfgr,p_name + order by p_mfgr,p_name) + ) + partition by p_mfgr + order by p_mfgr)) +POSTHOOK: type: QUERY POSTHOOK: Input: default@part #### A masked pattern was here #### Manufacturer#1 almond antique burnished rose metallic 1 1 2 4 @@ -1135,6 +3017,7 @@ Manufacturer#5 almond antique sky peru orange 3 3 2 39 Manufacturer#5 almond aquamarine dodger light gainsboro 4 4 46 85 Manufacturer#5 almond azure blanched chiffon midnight 5 5 23 108 PREHOOK: query: -- 21. testMultiOperatorChainEndsWithNoopMap +explain select p_mfgr, p_name, rank() over (partition by p_mfgr,p_name) as r, dense_rank() over (partition by p_mfgr,p_name) as dr, @@ -1151,9 +3034,8 @@ from noopwithmap(on partition by p_mfgr,p_name order by p_mfgr,p_name) PREHOOK: type: QUERY -PREHOOK: Input: default@part -#### A masked pattern was here #### POSTHOOK: query: -- 21. testMultiOperatorChainEndsWithNoopMap +explain select p_mfgr, p_name, rank() over (partition by p_mfgr,p_name) as r, dense_rank() over (partition by p_mfgr,p_name) as dr, @@ -1170,6 +3052,127 @@ from noopwithmap(on partition by p_mfgr,p_name order by p_mfgr,p_name) POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) + Reducer 5 <- Reducer 4 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_mfgr (type: string), p_name (type: string), p_mfgr (type: string), p_name (type: string) + sort order: ++++ + Map-reduce partition columns: p_mfgr (type: string), p_name (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int) + Reducer 2 + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: string), _col2 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) + Reducer 3 + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string), _col2 (type: string), _col1 (type: string) + sort order: ++++ + Map-reduce partition columns: _col2 (type: string), _col1 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) + Reducer 4 + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string), _col2 (type: string), _col1 (type: string) + sort order: ++++ + Map-reduce partition columns: _col2 (type: string), _col1 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) + Reducer 5 + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col1 (type: string), _wcol0 (type: int), _wcol1 (type: int), _col5 (type: int), _wcol2 (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr, p_name, +rank() over (partition by p_mfgr,p_name) as r, +dense_rank() over (partition by p_mfgr,p_name) as dr, +p_size, sum(p_size) over (partition by p_mfgr,p_name rows between unbounded preceding and current row) as s1 +from noopwithmap(on + noop(on + noop(on + noop(on part + partition by p_mfgr,p_name + order by p_mfgr,p_name) + ) + partition by p_mfgr + order by p_mfgr) + partition by p_mfgr,p_name + order by p_mfgr,p_name) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, p_name, +rank() over (partition by p_mfgr,p_name) as r, +dense_rank() over (partition by p_mfgr,p_name) as dr, +p_size, sum(p_size) over (partition by p_mfgr,p_name rows between unbounded preceding and current row) as s1 +from noopwithmap(on + noop(on + noop(on + noop(on part + partition by p_mfgr,p_name + order by p_mfgr,p_name) + ) + partition by p_mfgr + order by p_mfgr) + partition by p_mfgr,p_name + order by p_mfgr,p_name) +POSTHOOK: type: QUERY POSTHOOK: Input: default@part #### A masked pattern was here #### Manufacturer#1 almond antique burnished rose metallic 1 1 2 2 @@ -1199,6 +3202,7 @@ Manufacturer#5 almond antique sky peru orange 1 1 2 2 Manufacturer#5 almond aquamarine dodger light gainsboro 1 1 46 46 Manufacturer#5 almond azure blanched chiffon midnight 1 1 23 23 PREHOOK: query: -- 22. testMultiOperatorChainWithDiffPartitionForWindow1 +explain select p_mfgr, p_name, rank() over (partition by p_mfgr,p_name order by p_mfgr,p_name) as r, dense_rank() over (partition by p_mfgr,p_name order by p_mfgr,p_name) as dr, @@ -1214,9 +3218,8 @@ from noop(on order by p_mfgr )) PREHOOK: type: QUERY -PREHOOK: Input: default@part -#### A masked pattern was here #### POSTHOOK: query: -- 22. testMultiOperatorChainWithDiffPartitionForWindow1 +explain select p_mfgr, p_name, rank() over (partition by p_mfgr,p_name order by p_mfgr,p_name) as r, dense_rank() over (partition by p_mfgr,p_name order by p_mfgr,p_name) as dr, @@ -1232,6 +3235,112 @@ from noop(on order by p_mfgr )) POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_mfgr (type: string), p_name (type: string), p_mfgr (type: string), p_name (type: string) + sort order: ++++ + Map-reduce partition columns: p_mfgr (type: string), p_name (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int) + Reducer 2 + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: string), _col2 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) + Reducer 3 + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string), _col2 (type: string), _col1 (type: string) + sort order: ++++ + Map-reduce partition columns: _col2 (type: string), _col1 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) + Reducer 4 + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col1 (type: string), _wcol0 (type: int), _wcol1 (type: int), _col5 (type: int), _wcol2 (type: bigint), _wcol2 (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr, p_name, +rank() over (partition by p_mfgr,p_name order by p_mfgr,p_name) as r, +dense_rank() over (partition by p_mfgr,p_name order by p_mfgr,p_name) as dr, +p_size, +sum(p_size) over (partition by p_mfgr,p_name order by p_mfgr,p_name rows between unbounded preceding and current row) as s1, +sum(p_size) over (partition by p_mfgr,p_name order by p_mfgr,p_name rows between unbounded preceding and current row) as s2 +from noop(on + noopwithmap(on + noop(on part + partition by p_mfgr, p_name + order by p_mfgr, p_name) + partition by p_mfgr + order by p_mfgr + )) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, p_name, +rank() over (partition by p_mfgr,p_name order by p_mfgr,p_name) as r, +dense_rank() over (partition by p_mfgr,p_name order by p_mfgr,p_name) as dr, +p_size, +sum(p_size) over (partition by p_mfgr,p_name order by p_mfgr,p_name rows between unbounded preceding and current row) as s1, +sum(p_size) over (partition by p_mfgr,p_name order by p_mfgr,p_name rows between unbounded preceding and current row) as s2 +from noop(on + noopwithmap(on + noop(on part + partition by p_mfgr, p_name + order by p_mfgr, p_name) + partition by p_mfgr + order by p_mfgr + )) +POSTHOOK: type: QUERY POSTHOOK: Input: default@part #### A masked pattern was here #### Manufacturer#1 almond antique burnished rose metallic 1 1 2 2 2 @@ -1261,6 +3370,7 @@ Manufacturer#5 almond antique sky peru orange 1 1 2 2 2 Manufacturer#5 almond aquamarine dodger light gainsboro 1 1 46 46 46 Manufacturer#5 almond azure blanched chiffon midnight 1 1 23 23 23 PREHOOK: query: -- 23. testMultiOperatorChainWithDiffPartitionForWindow2 +explain select p_mfgr, p_name, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -1274,9 +3384,8 @@ from noopwithmap(on order by p_mfgr, p_name) )) PREHOOK: type: QUERY -PREHOOK: Input: default@part -#### A masked pattern was here #### POSTHOOK: query: -- 23. testMultiOperatorChainWithDiffPartitionForWindow2 +explain select p_mfgr, p_name, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -1290,6 +3399,108 @@ from noopwithmap(on order by p_mfgr, p_name) )) POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_mfgr (type: string), p_name (type: string), p_mfgr (type: string), p_name (type: string) + sort order: ++++ + Map-reduce partition columns: p_mfgr (type: string), p_name (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int) + Reducer 2 + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string), _col2 (type: string), _col1 (type: string) + sort order: ++++ + Map-reduce partition columns: _col2 (type: string), _col1 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) + Reducer 3 + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) + Reducer 4 + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col1 (type: string), _wcol0 (type: int), _wcol1 (type: int), _col5 (type: int), _wcol2 (type: bigint), _wcol2 (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr, p_name, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +p_size, +sum(p_size) over (partition by p_mfgr order by p_name range between unbounded preceding and current row) as s1, +sum(p_size) over (partition by p_mfgr order by p_name range between unbounded preceding and current row) as s2 +from noopwithmap(on + noop(on + noop(on part + partition by p_mfgr, p_name + order by p_mfgr, p_name) + )) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, p_name, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +p_size, +sum(p_size) over (partition by p_mfgr order by p_name range between unbounded preceding and current row) as s1, +sum(p_size) over (partition by p_mfgr order by p_name range between unbounded preceding and current row) as s2 +from noopwithmap(on + noop(on + noop(on part + partition by p_mfgr, p_name + order by p_mfgr, p_name) + )) +POSTHOOK: type: QUERY POSTHOOK: Input: default@part #### A masked pattern was here #### Manufacturer#1 almond antique burnished rose metallic 1 1 2 4 4 diff --git ql/src/test/results/clientpositive/tez/ptf_streaming.q.out ql/src/test/results/clientpositive/tez/ptf_streaming.q.out new file mode 100644 index 0000000..359e966 --- /dev/null +++ ql/src/test/results/clientpositive/tez/ptf_streaming.q.out @@ -0,0 +1,1789 @@ +PREHOOK: query: -- SORT_QUERY_RESULTS + +--1. test1 +explain +select p_mfgr, p_name, p_size, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +sum(p_retailprice) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s1 +from noopstreaming(on part + partition by p_mfgr + order by p_name + ) +PREHOOK: type: QUERY +POSTHOOK: query: -- SORT_QUERY_RESULTS + +--1. test1 +explain +select p_mfgr, p_name, p_size, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +sum(p_retailprice) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s1 +from noopstreaming(on part + partition by p_mfgr + order by p_name + ) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_mfgr (type: string), p_name (type: string) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int), p_retailprice (type: double) + Reducer 2 + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int), _col7 (type: double) + Reducer 3 + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr, p_name, p_size, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +sum(p_retailprice) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s1 +from noopstreaming(on part + partition by p_mfgr + order by p_name + ) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, p_name, p_size, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +sum(p_retailprice) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s1 +from noopstreaming(on part + partition by p_mfgr + order by p_name + ) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +Manufacturer#1 almond antique burnished rose metallic 2 1 1 1173.15 +Manufacturer#1 almond antique burnished rose metallic 2 1 1 2346.3 +Manufacturer#1 almond antique chartreuse lavender yellow 34 3 2 4100.06 +Manufacturer#1 almond antique salmon chartreuse burlywood 6 4 3 5702.650000000001 +Manufacturer#1 almond aquamarine burnished black steel 28 5 4 7117.070000000001 +Manufacturer#1 almond aquamarine pink moccasin thistle 42 6 5 8749.730000000001 +Manufacturer#2 almond antique violet chocolate turquoise 14 1 1 1690.68 +Manufacturer#2 almond antique violet turquoise frosted 40 2 2 3491.38 +Manufacturer#2 almond aquamarine midnight light salmon 2 3 3 5523.360000000001 +Manufacturer#2 almond aquamarine rose maroon antique 25 4 4 7222.02 +Manufacturer#2 almond aquamarine sandy cyan gainsboro 18 5 5 8923.62 +Manufacturer#3 almond antique chartreuse khaki white 17 1 1 1671.68 +Manufacturer#3 almond antique forest lavender goldenrod 14 2 2 2861.95 +Manufacturer#3 almond antique metallic orange dim 19 3 3 4272.34 +Manufacturer#3 almond antique misty red olive 1 4 4 6195.32 +Manufacturer#3 almond antique olive coral navajo 45 5 5 7532.61 +Manufacturer#4 almond antique gainsboro frosted violet 10 1 1 1620.67 +Manufacturer#4 almond antique violet mint lemon 39 2 2 2996.09 +Manufacturer#4 almond aquamarine floral ivory bisque 27 3 3 4202.35 +Manufacturer#4 almond aquamarine yellow dodger mint 7 4 4 6047.27 +Manufacturer#4 almond azure aquamarine papaya violet 12 5 5 7337.620000000001 +Manufacturer#5 almond antique blue firebrick mint 31 1 1 1789.69 +Manufacturer#5 almond antique medium spring khaki 6 2 2 3401.3500000000004 +Manufacturer#5 almond antique sky peru orange 2 3 3 5190.08 +Manufacturer#5 almond aquamarine dodger light gainsboro 46 4 4 6208.18 +Manufacturer#5 almond azure blanched chiffon midnight 23 5 5 7672.66 +PREHOOK: query: -- 2. testJoinWithNoop +explain +select p_mfgr, p_name, +p_size, p_size - lag(p_size,1,p_size) over (partition by p_mfgr order by p_name) as deltaSz +from noopstreaming (on (select p1.* from part p1 join part p2 on p1.p_partkey = p2.p_partkey) j +distribute by j.p_mfgr +sort by j.p_name) +PREHOOK: type: QUERY +POSTHOOK: query: -- 2. testJoinWithNoop +explain +select p_mfgr, p_name, +p_size, p_size - lag(p_size,1,p_size) over (partition by p_mfgr order by p_name) as deltaSz +from noopstreaming (on (select p1.* from part p1 join part p2 on p1.p_partkey = p2.p_partkey) j +distribute by j.p_mfgr +sort by j.p_name) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: p1 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: p_partkey is not null (type: boolean) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_partkey (type: int) + sort order: + + Map-reduce partition columns: p_partkey (type: int) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int) + Map 5 + Map Operator Tree: + TableScan + alias: p2 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: p_partkey is not null (type: boolean) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_partkey (type: int) + sort order: + + Map-reduce partition columns: p_partkey (type: int) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 p_partkey (type: int) + 1 p_partkey (type: int) + outputColumnNames: _col1, _col2, _col5 + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) + Reducer 3 + Reduce Operator Tree: + Extract + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) + Reducer 4 + Reduce Operator Tree: + Extract + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), (_col5 - _wcol0) (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr, p_name, +p_size, p_size - lag(p_size,1,p_size) over (partition by p_mfgr order by p_name) as deltaSz +from noopstreaming (on (select p1.* from part p1 join part p2 on p1.p_partkey = p2.p_partkey) j +distribute by j.p_mfgr +sort by j.p_name) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, p_name, +p_size, p_size - lag(p_size,1,p_size) over (partition by p_mfgr order by p_name) as deltaSz +from noopstreaming (on (select p1.* from part p1 join part p2 on p1.p_partkey = p2.p_partkey) j +distribute by j.p_mfgr +sort by j.p_name) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +Manufacturer#1 almond antique burnished rose metallic 2 0 +Manufacturer#1 almond antique burnished rose metallic 2 0 +Manufacturer#1 almond antique burnished rose metallic 2 0 +Manufacturer#1 almond antique burnished rose metallic 2 0 +Manufacturer#1 almond antique chartreuse lavender yellow 34 32 +Manufacturer#1 almond antique salmon chartreuse burlywood 6 -28 +Manufacturer#1 almond aquamarine burnished black steel 28 22 +Manufacturer#1 almond aquamarine pink moccasin thistle 42 14 +Manufacturer#2 almond antique violet chocolate turquoise 14 0 +Manufacturer#2 almond antique violet turquoise frosted 40 26 +Manufacturer#2 almond aquamarine midnight light salmon 2 -38 +Manufacturer#2 almond aquamarine rose maroon antique 25 23 +Manufacturer#2 almond aquamarine sandy cyan gainsboro 18 -7 +Manufacturer#3 almond antique chartreuse khaki white 17 0 +Manufacturer#3 almond antique forest lavender goldenrod 14 -3 +Manufacturer#3 almond antique metallic orange dim 19 5 +Manufacturer#3 almond antique misty red olive 1 -18 +Manufacturer#3 almond antique olive coral navajo 45 44 +Manufacturer#4 almond antique gainsboro frosted violet 10 0 +Manufacturer#4 almond antique violet mint lemon 39 29 +Manufacturer#4 almond aquamarine floral ivory bisque 27 -12 +Manufacturer#4 almond aquamarine yellow dodger mint 7 -20 +Manufacturer#4 almond azure aquamarine papaya violet 12 5 +Manufacturer#5 almond antique blue firebrick mint 31 0 +Manufacturer#5 almond antique medium spring khaki 6 -25 +Manufacturer#5 almond antique sky peru orange 2 -4 +Manufacturer#5 almond aquamarine dodger light gainsboro 46 44 +Manufacturer#5 almond azure blanched chiffon midnight 23 -23 +PREHOOK: query: -- 7. testJoin +explain +select abc.* +from noopstreaming(on part +partition by p_mfgr +order by p_name +) abc join part p1 on abc.p_partkey = p1.p_partkey +PREHOOK: type: QUERY +POSTHOOK: query: -- 7. testJoin +explain +select abc.* +from noopstreaming(on part +partition by p_mfgr +order by p_name +) abc join part p1 on abc.p_partkey = p1.p_partkey +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Map 4 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_mfgr (type: string), p_name (type: string) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + Map 4 + Map Operator Tree: + TableScan + alias: p1 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: p_partkey is not null (type: boolean) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_partkey (type: int) + sort order: + + Map-reduce partition columns: p_partkey (type: int) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: _col0 is not null (type: boolean) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + Reducer 3 + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 p_partkey (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select abc.* +from noopstreaming(on part +partition by p_mfgr +order by p_name +) abc join part p1 on abc.p_partkey = p1.p_partkey +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select abc.* +from noopstreaming(on part +partition by p_mfgr +order by p_name +) abc join part p1 on abc.p_partkey = p1.p_partkey +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +105685 almond antique violet chocolate turquoise Manufacturer#2 Brand#22 MEDIUM ANODIZED COPPER 14 MED CAN 1690.68 ly pending requ +110592 almond antique salmon chartreuse burlywood Manufacturer#1 Brand#15 PROMO BURNISHED NICKEL 6 JUMBO PKG 1602.59 to the furiously +112398 almond antique metallic orange dim Manufacturer#3 Brand#32 MEDIUM BURNISHED BRASS 19 JUMBO JAR 1410.39 ole car +121152 almond antique burnished rose metallic Manufacturer#1 Brand#14 PROMO PLATED TIN 2 JUMBO BOX 1173.15 e pinto beans h +121152 almond antique burnished rose metallic Manufacturer#1 Brand#14 PROMO PLATED TIN 2 JUMBO BOX 1173.15 e pinto beans h +121152 almond antique burnished rose metallic Manufacturer#1 Brand#14 PROMO PLATED TIN 2 JUMBO BOX 1173.15 e pinto beans h +121152 almond antique burnished rose metallic Manufacturer#1 Brand#14 PROMO PLATED TIN 2 JUMBO BOX 1173.15 e pinto beans h +132666 almond aquamarine rose maroon antique Manufacturer#2 Brand#24 SMALL POLISHED NICKEL 25 MED BOX 1698.66 even +144293 almond antique olive coral navajo Manufacturer#3 Brand#34 STANDARD POLISHED STEEL 45 JUMBO CAN 1337.29 ag furiously about +146985 almond aquamarine midnight light salmon Manufacturer#2 Brand#23 MEDIUM BURNISHED COPPER 2 SM CASE 2031.98 s cajole caref +15103 almond aquamarine dodger light gainsboro Manufacturer#5 Brand#53 ECONOMY BURNISHED STEEL 46 LG PACK 1018.1 packages hinder carefu +155733 almond antique sky peru orange Manufacturer#5 Brand#53 SMALL PLATED BRASS 2 WRAP DRUM 1788.73 furiously. bra +17273 almond antique forest lavender goldenrod Manufacturer#3 Brand#35 PROMO ANODIZED TIN 14 JUMBO CASE 1190.27 along the +17927 almond aquamarine yellow dodger mint Manufacturer#4 Brand#41 ECONOMY BRUSHED COPPER 7 SM PKG 1844.92 ites. eve +191709 almond antique violet turquoise frosted Manufacturer#2 Brand#22 ECONOMY POLISHED STEEL 40 MED BOX 1800.7 haggle +192697 almond antique blue firebrick mint Manufacturer#5 Brand#52 MEDIUM BURNISHED TIN 31 LG DRUM 1789.69 ickly ir +195606 almond aquamarine sandy cyan gainsboro Manufacturer#2 Brand#25 STANDARD PLATED TIN 18 SM PKG 1701.6 ic de +33357 almond azure aquamarine papaya violet Manufacturer#4 Brand#41 STANDARD ANODIZED TIN 12 WRAP CASE 1290.35 reful +40982 almond antique misty red olive Manufacturer#3 Brand#32 ECONOMY PLATED COPPER 1 LG PKG 1922.98 c foxes can s +42669 almond antique medium spring khaki Manufacturer#5 Brand#51 STANDARD BURNISHED TIN 6 MED CAN 1611.66 sits haggl +45261 almond aquamarine floral ivory bisque Manufacturer#4 Brand#42 SMALL PLATED STEEL 27 WRAP CASE 1206.26 careful +48427 almond antique violet mint lemon Manufacturer#4 Brand#42 PROMO POLISHED STEEL 39 SM CASE 1375.42 hely ironic i +49671 almond antique gainsboro frosted violet Manufacturer#4 Brand#41 SMALL BRUSHED BRASS 10 SM BOX 1620.67 ccounts run quick +65667 almond aquamarine pink moccasin thistle Manufacturer#1 Brand#12 LARGE BURNISHED STEEL 42 JUMBO CASE 1632.66 e across the expr +78486 almond azure blanched chiffon midnight Manufacturer#5 Brand#52 LARGE BRUSHED BRASS 23 MED BAG 1464.48 hely blith +85768 almond antique chartreuse lavender yellow Manufacturer#1 Brand#12 LARGE BRUSHED STEEL 34 SM BAG 1753.76 refull +86428 almond aquamarine burnished black steel Manufacturer#1 Brand#12 STANDARD ANODIZED STEEL 28 WRAP BAG 1414.42 arefully +90681 almond antique chartreuse khaki white Manufacturer#3 Brand#31 MEDIUM BURNISHED TIN 17 SM CASE 1671.68 are slyly after the sl +PREHOOK: query: -- 9. testNoopWithMap +explain +select p_mfgr, p_name, p_size, +rank() over (partition by p_mfgr order by p_name, p_size desc) as r +from noopwithmapstreaming(on part +partition by p_mfgr +order by p_name, p_size desc) +PREHOOK: type: QUERY +POSTHOOK: query: -- 9. testNoopWithMap +explain +select p_mfgr, p_name, p_size, +rank() over (partition by p_mfgr order by p_name, p_size desc) as r +from noopwithmapstreaming(on part +partition by p_mfgr +order by p_name, p_size desc) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_mfgr (type: string), p_name (type: string), p_size (type: int) + sort order: ++- + Map-reduce partition columns: p_mfgr (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int) + Reducer 2 + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int) + sort order: ++- + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) + Reducer 3 + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr, p_name, p_size, +rank() over (partition by p_mfgr order by p_name, p_size desc) as r +from noopwithmapstreaming(on part +partition by p_mfgr +order by p_name, p_size desc) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, p_name, p_size, +rank() over (partition by p_mfgr order by p_name, p_size desc) as r +from noopwithmapstreaming(on part +partition by p_mfgr +order by p_name, p_size desc) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +Manufacturer#1 almond antique burnished rose metallic 2 1 +Manufacturer#1 almond antique burnished rose metallic 2 1 +Manufacturer#1 almond antique chartreuse lavender yellow 34 3 +Manufacturer#1 almond antique salmon chartreuse burlywood 6 4 +Manufacturer#1 almond aquamarine burnished black steel 28 5 +Manufacturer#1 almond aquamarine pink moccasin thistle 42 6 +Manufacturer#2 almond antique violet chocolate turquoise 14 1 +Manufacturer#2 almond antique violet turquoise frosted 40 2 +Manufacturer#2 almond aquamarine midnight light salmon 2 3 +Manufacturer#2 almond aquamarine rose maroon antique 25 4 +Manufacturer#2 almond aquamarine sandy cyan gainsboro 18 5 +Manufacturer#3 almond antique chartreuse khaki white 17 1 +Manufacturer#3 almond antique forest lavender goldenrod 14 2 +Manufacturer#3 almond antique metallic orange dim 19 3 +Manufacturer#3 almond antique misty red olive 1 4 +Manufacturer#3 almond antique olive coral navajo 45 5 +Manufacturer#4 almond antique gainsboro frosted violet 10 1 +Manufacturer#4 almond antique violet mint lemon 39 2 +Manufacturer#4 almond aquamarine floral ivory bisque 27 3 +Manufacturer#4 almond aquamarine yellow dodger mint 7 4 +Manufacturer#4 almond azure aquamarine papaya violet 12 5 +Manufacturer#5 almond antique blue firebrick mint 31 1 +Manufacturer#5 almond antique medium spring khaki 6 2 +Manufacturer#5 almond antique sky peru orange 2 3 +Manufacturer#5 almond aquamarine dodger light gainsboro 46 4 +Manufacturer#5 almond azure blanched chiffon midnight 23 5 +PREHOOK: query: -- 10. testNoopWithMapWithWindowing +explain +select p_mfgr, p_name, p_size, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +sum(p_retailprice) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s1 +from noopwithmapstreaming(on part + partition by p_mfgr + order by p_name) +PREHOOK: type: QUERY +POSTHOOK: query: -- 10. testNoopWithMapWithWindowing +explain +select p_mfgr, p_name, p_size, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +sum(p_retailprice) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s1 +from noopwithmapstreaming(on part + partition by p_mfgr + order by p_name) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_mfgr (type: string), p_name (type: string) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int), p_retailprice (type: double) + Reducer 2 + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int), _col7 (type: double) + Reducer 3 + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr, p_name, p_size, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +sum(p_retailprice) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s1 +from noopwithmapstreaming(on part + partition by p_mfgr + order by p_name) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, p_name, p_size, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +sum(p_retailprice) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s1 +from noopwithmapstreaming(on part + partition by p_mfgr + order by p_name) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +Manufacturer#1 almond antique burnished rose metallic 2 1 1 1173.15 +Manufacturer#1 almond antique burnished rose metallic 2 1 1 2346.3 +Manufacturer#1 almond antique chartreuse lavender yellow 34 3 2 4100.06 +Manufacturer#1 almond antique salmon chartreuse burlywood 6 4 3 5702.650000000001 +Manufacturer#1 almond aquamarine burnished black steel 28 5 4 7117.070000000001 +Manufacturer#1 almond aquamarine pink moccasin thistle 42 6 5 8749.730000000001 +Manufacturer#2 almond antique violet chocolate turquoise 14 1 1 1690.68 +Manufacturer#2 almond antique violet turquoise frosted 40 2 2 3491.38 +Manufacturer#2 almond aquamarine midnight light salmon 2 3 3 5523.360000000001 +Manufacturer#2 almond aquamarine rose maroon antique 25 4 4 7222.02 +Manufacturer#2 almond aquamarine sandy cyan gainsboro 18 5 5 8923.62 +Manufacturer#3 almond antique chartreuse khaki white 17 1 1 1671.68 +Manufacturer#3 almond antique forest lavender goldenrod 14 2 2 2861.95 +Manufacturer#3 almond antique metallic orange dim 19 3 3 4272.34 +Manufacturer#3 almond antique misty red olive 1 4 4 6195.32 +Manufacturer#3 almond antique olive coral navajo 45 5 5 7532.61 +Manufacturer#4 almond antique gainsboro frosted violet 10 1 1 1620.67 +Manufacturer#4 almond antique violet mint lemon 39 2 2 2996.09 +Manufacturer#4 almond aquamarine floral ivory bisque 27 3 3 4202.35 +Manufacturer#4 almond aquamarine yellow dodger mint 7 4 4 6047.27 +Manufacturer#4 almond azure aquamarine papaya violet 12 5 5 7337.620000000001 +Manufacturer#5 almond antique blue firebrick mint 31 1 1 1789.69 +Manufacturer#5 almond antique medium spring khaki 6 2 2 3401.3500000000004 +Manufacturer#5 almond antique sky peru orange 2 3 3 5190.08 +Manufacturer#5 almond aquamarine dodger light gainsboro 46 4 4 6208.18 +Manufacturer#5 almond azure blanched chiffon midnight 23 5 5 7672.66 +PREHOOK: query: -- 12. testFunctionChain +explain +select p_mfgr, p_name, p_size, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +sum(p_retailprice) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s1 +from noopstreaming(on noopwithmapstreaming(on noopstreaming(on part +partition by p_mfgr +order by p_mfgr, p_name +))) +PREHOOK: type: QUERY +POSTHOOK: query: -- 12. testFunctionChain +explain +select p_mfgr, p_name, p_size, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +sum(p_retailprice) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s1 +from noopstreaming(on noopwithmapstreaming(on noopstreaming(on part +partition by p_mfgr +order by p_mfgr, p_name +))) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_mfgr (type: string), p_mfgr (type: string), p_name (type: string) + sort order: +++ + Map-reduce partition columns: p_mfgr (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int), p_retailprice (type: double) + Reducer 2 + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: string), _col2 (type: string), _col1 (type: string) + sort order: +++ + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int), _col7 (type: double) + Reducer 3 + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int), _col7 (type: double) + Reducer 4 + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr, p_name, p_size, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +sum(p_retailprice) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s1 +from noopstreaming(on noopwithmapstreaming(on noopstreaming(on part +partition by p_mfgr +order by p_mfgr, p_name +))) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, p_name, p_size, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +sum(p_retailprice) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s1 +from noopstreaming(on noopwithmapstreaming(on noopstreaming(on part +partition by p_mfgr +order by p_mfgr, p_name +))) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +Manufacturer#1 almond antique burnished rose metallic 2 1 1 1173.15 +Manufacturer#1 almond antique burnished rose metallic 2 1 1 2346.3 +Manufacturer#1 almond antique chartreuse lavender yellow 34 3 2 4100.06 +Manufacturer#1 almond antique salmon chartreuse burlywood 6 4 3 5702.650000000001 +Manufacturer#1 almond aquamarine burnished black steel 28 5 4 7117.070000000001 +Manufacturer#1 almond aquamarine pink moccasin thistle 42 6 5 8749.730000000001 +Manufacturer#2 almond antique violet chocolate turquoise 14 1 1 1690.68 +Manufacturer#2 almond antique violet turquoise frosted 40 2 2 3491.38 +Manufacturer#2 almond aquamarine midnight light salmon 2 3 3 5523.360000000001 +Manufacturer#2 almond aquamarine rose maroon antique 25 4 4 7222.02 +Manufacturer#2 almond aquamarine sandy cyan gainsboro 18 5 5 8923.62 +Manufacturer#3 almond antique chartreuse khaki white 17 1 1 1671.68 +Manufacturer#3 almond antique forest lavender goldenrod 14 2 2 2861.95 +Manufacturer#3 almond antique metallic orange dim 19 3 3 4272.34 +Manufacturer#3 almond antique misty red olive 1 4 4 6195.32 +Manufacturer#3 almond antique olive coral navajo 45 5 5 7532.61 +Manufacturer#4 almond antique gainsboro frosted violet 10 1 1 1620.67 +Manufacturer#4 almond antique violet mint lemon 39 2 2 2996.09 +Manufacturer#4 almond aquamarine floral ivory bisque 27 3 3 4202.35 +Manufacturer#4 almond aquamarine yellow dodger mint 7 4 4 6047.27 +Manufacturer#4 almond azure aquamarine papaya violet 12 5 5 7337.620000000001 +Manufacturer#5 almond antique blue firebrick mint 31 1 1 1789.69 +Manufacturer#5 almond antique medium spring khaki 6 2 2 3401.3500000000004 +Manufacturer#5 almond antique sky peru orange 2 3 3 5190.08 +Manufacturer#5 almond aquamarine dodger light gainsboro 46 4 4 6208.18 +Manufacturer#5 almond azure blanched chiffon midnight 23 5 5 7672.66 +PREHOOK: query: -- 12.1 testFunctionChain +explain +select p_mfgr, p_name, p_size, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +sum(p_retailprice) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s1 +from noopstreaming(on noopwithmap(on noopstreaming(on part +partition by p_mfgr +order by p_mfgr, p_name +))) +PREHOOK: type: QUERY +POSTHOOK: query: -- 12.1 testFunctionChain +explain +select p_mfgr, p_name, p_size, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +sum(p_retailprice) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s1 +from noopstreaming(on noopwithmap(on noopstreaming(on part +partition by p_mfgr +order by p_mfgr, p_name +))) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_mfgr (type: string), p_mfgr (type: string), p_name (type: string) + sort order: +++ + Map-reduce partition columns: p_mfgr (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int), p_retailprice (type: double) + Reducer 2 + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: string), _col2 (type: string), _col1 (type: string) + sort order: +++ + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int), _col7 (type: double) + Reducer 3 + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int), _col7 (type: double) + Reducer 4 + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr, p_name, p_size, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +sum(p_retailprice) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s1 +from noopstreaming(on noopwithmap(on noopstreaming(on part +partition by p_mfgr +order by p_mfgr, p_name +))) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, p_name, p_size, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +sum(p_retailprice) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s1 +from noopstreaming(on noopwithmap(on noopstreaming(on part +partition by p_mfgr +order by p_mfgr, p_name +))) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +Manufacturer#1 almond antique burnished rose metallic 2 1 1 1173.15 +Manufacturer#1 almond antique burnished rose metallic 2 1 1 2346.3 +Manufacturer#1 almond antique chartreuse lavender yellow 34 3 2 4100.06 +Manufacturer#1 almond antique salmon chartreuse burlywood 6 4 3 5702.650000000001 +Manufacturer#1 almond aquamarine burnished black steel 28 5 4 7117.070000000001 +Manufacturer#1 almond aquamarine pink moccasin thistle 42 6 5 8749.730000000001 +Manufacturer#2 almond antique violet chocolate turquoise 14 1 1 1690.68 +Manufacturer#2 almond antique violet turquoise frosted 40 2 2 3491.38 +Manufacturer#2 almond aquamarine midnight light salmon 2 3 3 5523.360000000001 +Manufacturer#2 almond aquamarine rose maroon antique 25 4 4 7222.02 +Manufacturer#2 almond aquamarine sandy cyan gainsboro 18 5 5 8923.62 +Manufacturer#3 almond antique chartreuse khaki white 17 1 1 1671.68 +Manufacturer#3 almond antique forest lavender goldenrod 14 2 2 2861.95 +Manufacturer#3 almond antique metallic orange dim 19 3 3 4272.34 +Manufacturer#3 almond antique misty red olive 1 4 4 6195.32 +Manufacturer#3 almond antique olive coral navajo 45 5 5 7532.61 +Manufacturer#4 almond antique gainsboro frosted violet 10 1 1 1620.67 +Manufacturer#4 almond antique violet mint lemon 39 2 2 2996.09 +Manufacturer#4 almond aquamarine floral ivory bisque 27 3 3 4202.35 +Manufacturer#4 almond aquamarine yellow dodger mint 7 4 4 6047.27 +Manufacturer#4 almond azure aquamarine papaya violet 12 5 5 7337.620000000001 +Manufacturer#5 almond antique blue firebrick mint 31 1 1 1789.69 +Manufacturer#5 almond antique medium spring khaki 6 2 2 3401.3500000000004 +Manufacturer#5 almond antique sky peru orange 2 3 3 5190.08 +Manufacturer#5 almond aquamarine dodger light gainsboro 46 4 4 6208.18 +Manufacturer#5 almond azure blanched chiffon midnight 23 5 5 7672.66 +PREHOOK: query: -- 12.2 testFunctionChain +explain +select p_mfgr, p_name, p_size, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +sum(p_retailprice) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s1 +from noop(on noopwithmapstreaming(on noopstreaming(on part +partition by p_mfgr +order by p_mfgr, p_name +))) +PREHOOK: type: QUERY +POSTHOOK: query: -- 12.2 testFunctionChain +explain +select p_mfgr, p_name, p_size, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +sum(p_retailprice) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s1 +from noop(on noopwithmapstreaming(on noopstreaming(on part +partition by p_mfgr +order by p_mfgr, p_name +))) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_mfgr (type: string), p_mfgr (type: string), p_name (type: string) + sort order: +++ + Map-reduce partition columns: p_mfgr (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int), p_retailprice (type: double) + Reducer 2 + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: string), _col2 (type: string), _col1 (type: string) + sort order: +++ + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int), _col7 (type: double) + Reducer 3 + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int), _col7 (type: double) + Reducer 4 + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr, p_name, p_size, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +sum(p_retailprice) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s1 +from noop(on noopwithmapstreaming(on noopstreaming(on part +partition by p_mfgr +order by p_mfgr, p_name +))) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, p_name, p_size, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +sum(p_retailprice) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s1 +from noop(on noopwithmapstreaming(on noopstreaming(on part +partition by p_mfgr +order by p_mfgr, p_name +))) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +Manufacturer#1 almond antique burnished rose metallic 2 1 1 1173.15 +Manufacturer#1 almond antique burnished rose metallic 2 1 1 2346.3 +Manufacturer#1 almond antique chartreuse lavender yellow 34 3 2 4100.06 +Manufacturer#1 almond antique salmon chartreuse burlywood 6 4 3 5702.650000000001 +Manufacturer#1 almond aquamarine burnished black steel 28 5 4 7117.070000000001 +Manufacturer#1 almond aquamarine pink moccasin thistle 42 6 5 8749.730000000001 +Manufacturer#2 almond antique violet chocolate turquoise 14 1 1 1690.68 +Manufacturer#2 almond antique violet turquoise frosted 40 2 2 3491.38 +Manufacturer#2 almond aquamarine midnight light salmon 2 3 3 5523.360000000001 +Manufacturer#2 almond aquamarine rose maroon antique 25 4 4 7222.02 +Manufacturer#2 almond aquamarine sandy cyan gainsboro 18 5 5 8923.62 +Manufacturer#3 almond antique chartreuse khaki white 17 1 1 1671.68 +Manufacturer#3 almond antique forest lavender goldenrod 14 2 2 2861.95 +Manufacturer#3 almond antique metallic orange dim 19 3 3 4272.34 +Manufacturer#3 almond antique misty red olive 1 4 4 6195.32 +Manufacturer#3 almond antique olive coral navajo 45 5 5 7532.61 +Manufacturer#4 almond antique gainsboro frosted violet 10 1 1 1620.67 +Manufacturer#4 almond antique violet mint lemon 39 2 2 2996.09 +Manufacturer#4 almond aquamarine floral ivory bisque 27 3 3 4202.35 +Manufacturer#4 almond aquamarine yellow dodger mint 7 4 4 6047.27 +Manufacturer#4 almond azure aquamarine papaya violet 12 5 5 7337.620000000001 +Manufacturer#5 almond antique blue firebrick mint 31 1 1 1789.69 +Manufacturer#5 almond antique medium spring khaki 6 2 2 3401.3500000000004 +Manufacturer#5 almond antique sky peru orange 2 3 3 5190.08 +Manufacturer#5 almond aquamarine dodger light gainsboro 46 4 4 6208.18 +Manufacturer#5 almond azure blanched chiffon midnight 23 5 5 7672.66 +PREHOOK: query: -- 14. testPTFJoinWithWindowingWithCount +explain +select abc.p_mfgr, abc.p_name, +rank() over (distribute by abc.p_mfgr sort by abc.p_name) as r, +dense_rank() over (distribute by abc.p_mfgr sort by abc.p_name) as dr, +count(abc.p_name) over (distribute by abc.p_mfgr sort by abc.p_name) as cd, +abc.p_retailprice, sum(abc.p_retailprice) over (distribute by abc.p_mfgr sort by abc.p_name rows between unbounded preceding and current row) as s1, +abc.p_size, abc.p_size - lag(abc.p_size,1,abc.p_size) over (distribute by abc.p_mfgr sort by abc.p_name) as deltaSz +from noopstreaming(on part +partition by p_mfgr +order by p_name +) abc join part p1 on abc.p_partkey = p1.p_partkey +PREHOOK: type: QUERY +POSTHOOK: query: -- 14. testPTFJoinWithWindowingWithCount +explain +select abc.p_mfgr, abc.p_name, +rank() over (distribute by abc.p_mfgr sort by abc.p_name) as r, +dense_rank() over (distribute by abc.p_mfgr sort by abc.p_name) as dr, +count(abc.p_name) over (distribute by abc.p_mfgr sort by abc.p_name) as cd, +abc.p_retailprice, sum(abc.p_retailprice) over (distribute by abc.p_mfgr sort by abc.p_name rows between unbounded preceding and current row) as s1, +abc.p_size, abc.p_size - lag(abc.p_size,1,abc.p_size) over (distribute by abc.p_mfgr sort by abc.p_name) as deltaSz +from noopstreaming(on part +partition by p_mfgr +order by p_name +) abc join part p1 on abc.p_partkey = p1.p_partkey +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Map 5 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_mfgr (type: string), p_name (type: string) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_size (type: int), p_retailprice (type: double) + Map 5 + Map Operator Tree: + TableScan + alias: p1 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: p_partkey is not null (type: boolean) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_partkey (type: int) + sort order: + + Map-reduce partition columns: p_partkey (type: int) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: _col0 is not null (type: boolean) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int), _col7 (type: double) + Reducer 3 + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 p_partkey (type: int) + outputColumnNames: _col1, _col2, _col5, _col7 + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int), _col7 (type: double) + Reducer 4 + Reduce Operator Tree: + Extract + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col1 (type: string), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: bigint), _col7 (type: double), _wcol3 (type: double), _col5 (type: int), (_col5 - _wcol4) (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select abc.p_mfgr, abc.p_name, +rank() over (distribute by abc.p_mfgr sort by abc.p_name) as r, +dense_rank() over (distribute by abc.p_mfgr sort by abc.p_name) as dr, +count(abc.p_name) over (distribute by abc.p_mfgr sort by abc.p_name) as cd, +abc.p_retailprice, sum(abc.p_retailprice) over (distribute by abc.p_mfgr sort by abc.p_name rows between unbounded preceding and current row) as s1, +abc.p_size, abc.p_size - lag(abc.p_size,1,abc.p_size) over (distribute by abc.p_mfgr sort by abc.p_name) as deltaSz +from noopstreaming(on part +partition by p_mfgr +order by p_name +) abc join part p1 on abc.p_partkey = p1.p_partkey +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select abc.p_mfgr, abc.p_name, +rank() over (distribute by abc.p_mfgr sort by abc.p_name) as r, +dense_rank() over (distribute by abc.p_mfgr sort by abc.p_name) as dr, +count(abc.p_name) over (distribute by abc.p_mfgr sort by abc.p_name) as cd, +abc.p_retailprice, sum(abc.p_retailprice) over (distribute by abc.p_mfgr sort by abc.p_name rows between unbounded preceding and current row) as s1, +abc.p_size, abc.p_size - lag(abc.p_size,1,abc.p_size) over (distribute by abc.p_mfgr sort by abc.p_name) as deltaSz +from noopstreaming(on part +partition by p_mfgr +order by p_name +) abc join part p1 on abc.p_partkey = p1.p_partkey +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +Manufacturer#1 almond antique burnished rose metallic 1 1 4 1173.15 1173.15 2 0 +Manufacturer#1 almond antique burnished rose metallic 1 1 4 1173.15 2346.3 2 0 +Manufacturer#1 almond antique burnished rose metallic 1 1 4 1173.15 3519.4500000000003 2 0 +Manufacturer#1 almond antique burnished rose metallic 1 1 4 1173.15 4692.6 2 0 +Manufacturer#1 almond antique chartreuse lavender yellow 5 2 5 1753.76 6446.360000000001 34 32 +Manufacturer#1 almond antique salmon chartreuse burlywood 6 3 6 1602.59 8048.950000000001 6 -28 +Manufacturer#1 almond aquamarine burnished black steel 7 4 7 1414.42 9463.37 28 22 +Manufacturer#1 almond aquamarine pink moccasin thistle 8 5 8 1632.66 11096.03 42 14 +Manufacturer#2 almond antique violet chocolate turquoise 1 1 1 1690.68 1690.68 14 0 +Manufacturer#2 almond antique violet turquoise frosted 2 2 2 1800.7 3491.38 40 26 +Manufacturer#2 almond aquamarine midnight light salmon 3 3 3 2031.98 5523.360000000001 2 -38 +Manufacturer#2 almond aquamarine rose maroon antique 4 4 4 1698.66 7222.02 25 23 +Manufacturer#2 almond aquamarine sandy cyan gainsboro 5 5 5 1701.6 8923.62 18 -7 +Manufacturer#3 almond antique chartreuse khaki white 1 1 1 1671.68 1671.68 17 0 +Manufacturer#3 almond antique forest lavender goldenrod 2 2 2 1190.27 2861.95 14 -3 +Manufacturer#3 almond antique metallic orange dim 3 3 3 1410.39 4272.34 19 5 +Manufacturer#3 almond antique misty red olive 4 4 4 1922.98 6195.32 1 -18 +Manufacturer#3 almond antique olive coral navajo 5 5 5 1337.29 7532.61 45 44 +Manufacturer#4 almond antique gainsboro frosted violet 1 1 1 1620.67 1620.67 10 0 +Manufacturer#4 almond antique violet mint lemon 2 2 2 1375.42 2996.09 39 29 +Manufacturer#4 almond aquamarine floral ivory bisque 3 3 3 1206.26 4202.35 27 -12 +Manufacturer#4 almond aquamarine yellow dodger mint 4 4 4 1844.92 6047.27 7 -20 +Manufacturer#4 almond azure aquamarine papaya violet 5 5 5 1290.35 7337.620000000001 12 5 +Manufacturer#5 almond antique blue firebrick mint 1 1 1 1789.69 1789.69 31 0 +Manufacturer#5 almond antique medium spring khaki 2 2 2 1611.66 3401.3500000000004 6 -25 +Manufacturer#5 almond antique sky peru orange 3 3 3 1788.73 5190.08 2 -4 +Manufacturer#5 almond aquamarine dodger light gainsboro 4 4 4 1018.1 6208.18 46 44 +Manufacturer#5 almond azure blanched chiffon midnight 5 5 5 1464.48 7672.66 23 -23 +PREHOOK: query: -- 18. testMulti2OperatorsFunctionChainWithMap +explain +select p_mfgr, p_name, +rank() over (partition by p_mfgr,p_name) as r, +dense_rank() over (partition by p_mfgr,p_name) as dr, +p_size, sum(p_size) over (partition by p_mfgr,p_name rows between unbounded preceding and current row) as s1 +from noopstreaming(on + noopwithmap(on + noop(on + noopstreaming(on part + partition by p_mfgr + order by p_mfgr) + ) + partition by p_mfgr,p_name + order by p_mfgr,p_name) + partition by p_mfgr,p_name + order by p_mfgr,p_name) +PREHOOK: type: QUERY +POSTHOOK: query: -- 18. testMulti2OperatorsFunctionChainWithMap +explain +select p_mfgr, p_name, +rank() over (partition by p_mfgr,p_name) as r, +dense_rank() over (partition by p_mfgr,p_name) as dr, +p_size, sum(p_size) over (partition by p_mfgr,p_name rows between unbounded preceding and current row) as s1 +from noopstreaming(on + noopwithmap(on + noop(on + noopstreaming(on part + partition by p_mfgr + order by p_mfgr) + ) + partition by p_mfgr,p_name + order by p_mfgr,p_name) + partition by p_mfgr,p_name + order by p_mfgr,p_name) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_mfgr (type: string), p_mfgr (type: string) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int) + Reducer 2 + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string), _col2 (type: string), _col1 (type: string) + sort order: ++++ + Map-reduce partition columns: _col2 (type: string), _col1 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) + Reducer 3 + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string), _col2 (type: string), _col1 (type: string) + sort order: ++++ + Map-reduce partition columns: _col2 (type: string), _col1 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) + Reducer 4 + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col1 (type: string), _wcol0 (type: int), _wcol1 (type: int), _col5 (type: int), _wcol2 (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr, p_name, +rank() over (partition by p_mfgr,p_name) as r, +dense_rank() over (partition by p_mfgr,p_name) as dr, +p_size, sum(p_size) over (partition by p_mfgr,p_name rows between unbounded preceding and current row) as s1 +from noopstreaming(on + noopwithmap(on + noop(on + noopstreaming(on part + partition by p_mfgr + order by p_mfgr) + ) + partition by p_mfgr,p_name + order by p_mfgr,p_name) + partition by p_mfgr,p_name + order by p_mfgr,p_name) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, p_name, +rank() over (partition by p_mfgr,p_name) as r, +dense_rank() over (partition by p_mfgr,p_name) as dr, +p_size, sum(p_size) over (partition by p_mfgr,p_name rows between unbounded preceding and current row) as s1 +from noopstreaming(on + noopwithmap(on + noop(on + noopstreaming(on part + partition by p_mfgr + order by p_mfgr) + ) + partition by p_mfgr,p_name + order by p_mfgr,p_name) + partition by p_mfgr,p_name + order by p_mfgr,p_name) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +Manufacturer#1 almond antique burnished rose metallic 1 1 2 2 +Manufacturer#1 almond antique burnished rose metallic 1 1 2 4 +Manufacturer#1 almond antique chartreuse lavender yellow 1 1 34 34 +Manufacturer#1 almond antique salmon chartreuse burlywood 1 1 6 6 +Manufacturer#1 almond aquamarine burnished black steel 1 1 28 28 +Manufacturer#1 almond aquamarine pink moccasin thistle 1 1 42 42 +Manufacturer#2 almond antique violet chocolate turquoise 1 1 14 14 +Manufacturer#2 almond antique violet turquoise frosted 1 1 40 40 +Manufacturer#2 almond aquamarine midnight light salmon 1 1 2 2 +Manufacturer#2 almond aquamarine rose maroon antique 1 1 25 25 +Manufacturer#2 almond aquamarine sandy cyan gainsboro 1 1 18 18 +Manufacturer#3 almond antique chartreuse khaki white 1 1 17 17 +Manufacturer#3 almond antique forest lavender goldenrod 1 1 14 14 +Manufacturer#3 almond antique metallic orange dim 1 1 19 19 +Manufacturer#3 almond antique misty red olive 1 1 1 1 +Manufacturer#3 almond antique olive coral navajo 1 1 45 45 +Manufacturer#4 almond antique gainsboro frosted violet 1 1 10 10 +Manufacturer#4 almond antique violet mint lemon 1 1 39 39 +Manufacturer#4 almond aquamarine floral ivory bisque 1 1 27 27 +Manufacturer#4 almond aquamarine yellow dodger mint 1 1 7 7 +Manufacturer#4 almond azure aquamarine papaya violet 1 1 12 12 +Manufacturer#5 almond antique blue firebrick mint 1 1 31 31 +Manufacturer#5 almond antique medium spring khaki 1 1 6 6 +Manufacturer#5 almond antique sky peru orange 1 1 2 2 +Manufacturer#5 almond aquamarine dodger light gainsboro 1 1 46 46 +Manufacturer#5 almond azure blanched chiffon midnight 1 1 23 23 +PREHOOK: query: -- 19. testMulti3OperatorsFunctionChain +explain +select p_mfgr, p_name, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +p_size, sum(p_size) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s1 +from noop(on + noopstreaming(on + noop(on + noopstreaming(on part + partition by p_mfgr + order by p_mfgr) + ) + partition by p_mfgr,p_name + order by p_mfgr,p_name) + partition by p_mfgr + order by p_mfgr ) +PREHOOK: type: QUERY +POSTHOOK: query: -- 19. testMulti3OperatorsFunctionChain +explain +select p_mfgr, p_name, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +p_size, sum(p_size) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s1 +from noop(on + noopstreaming(on + noop(on + noopstreaming(on part + partition by p_mfgr + order by p_mfgr) + ) + partition by p_mfgr,p_name + order by p_mfgr,p_name) + partition by p_mfgr + order by p_mfgr ) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) + Reducer 5 <- Reducer 4 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_mfgr (type: string), p_mfgr (type: string) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int) + Reducer 2 + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string), _col2 (type: string), _col1 (type: string) + sort order: ++++ + Map-reduce partition columns: _col2 (type: string), _col1 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) + Reducer 3 + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: string), _col2 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) + Reducer 4 + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) + Reducer 5 + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col1 (type: string), _wcol0 (type: int), _wcol1 (type: int), _col5 (type: int), _wcol2 (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr, p_name, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +p_size, sum(p_size) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s1 +from noop(on + noopstreaming(on + noop(on + noopstreaming(on part + partition by p_mfgr + order by p_mfgr) + ) + partition by p_mfgr,p_name + order by p_mfgr,p_name) + partition by p_mfgr + order by p_mfgr ) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, p_name, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +p_size, sum(p_size) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s1 +from noop(on + noopstreaming(on + noop(on + noopstreaming(on part + partition by p_mfgr + order by p_mfgr) + ) + partition by p_mfgr,p_name + order by p_mfgr,p_name) + partition by p_mfgr + order by p_mfgr ) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +Manufacturer#1 almond antique burnished rose metallic 1 1 2 2 +Manufacturer#1 almond antique burnished rose metallic 1 1 2 4 +Manufacturer#1 almond antique chartreuse lavender yellow 3 2 34 38 +Manufacturer#1 almond antique salmon chartreuse burlywood 4 3 6 44 +Manufacturer#1 almond aquamarine burnished black steel 5 4 28 72 +Manufacturer#1 almond aquamarine pink moccasin thistle 6 5 42 114 +Manufacturer#2 almond antique violet chocolate turquoise 1 1 14 14 +Manufacturer#2 almond antique violet turquoise frosted 2 2 40 54 +Manufacturer#2 almond aquamarine midnight light salmon 3 3 2 56 +Manufacturer#2 almond aquamarine rose maroon antique 4 4 25 81 +Manufacturer#2 almond aquamarine sandy cyan gainsboro 5 5 18 99 +Manufacturer#3 almond antique chartreuse khaki white 1 1 17 17 +Manufacturer#3 almond antique forest lavender goldenrod 2 2 14 31 +Manufacturer#3 almond antique metallic orange dim 3 3 19 50 +Manufacturer#3 almond antique misty red olive 4 4 1 51 +Manufacturer#3 almond antique olive coral navajo 5 5 45 96 +Manufacturer#4 almond antique gainsboro frosted violet 1 1 10 10 +Manufacturer#4 almond antique violet mint lemon 2 2 39 49 +Manufacturer#4 almond aquamarine floral ivory bisque 3 3 27 76 +Manufacturer#4 almond aquamarine yellow dodger mint 4 4 7 83 +Manufacturer#4 almond azure aquamarine papaya violet 5 5 12 95 +Manufacturer#5 almond antique blue firebrick mint 1 1 31 31 +Manufacturer#5 almond antique medium spring khaki 2 2 6 37 +Manufacturer#5 almond antique sky peru orange 3 3 2 39 +Manufacturer#5 almond aquamarine dodger light gainsboro 4 4 46 85 +Manufacturer#5 almond azure blanched chiffon midnight 5 5 23 108 +PREHOOK: query: -- 23. testMultiOperatorChainWithDiffPartitionForWindow2 +explain +select p_mfgr, p_name, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +p_size, +sum(p_size) over (partition by p_mfgr order by p_name range between unbounded preceding and current row) as s1, +sum(p_size) over (partition by p_mfgr order by p_name range between unbounded preceding and current row) as s2 +from noopwithmapstreaming(on + noop(on + noopstreaming(on part + partition by p_mfgr, p_name + order by p_mfgr, p_name) + )) +PREHOOK: type: QUERY +POSTHOOK: query: -- 23. testMultiOperatorChainWithDiffPartitionForWindow2 +explain +select p_mfgr, p_name, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +p_size, +sum(p_size) over (partition by p_mfgr order by p_name range between unbounded preceding and current row) as s1, +sum(p_size) over (partition by p_mfgr order by p_name range between unbounded preceding and current row) as s2 +from noopwithmapstreaming(on + noop(on + noopstreaming(on part + partition by p_mfgr, p_name + order by p_mfgr, p_name) + )) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_mfgr (type: string), p_name (type: string), p_mfgr (type: string), p_name (type: string) + sort order: ++++ + Map-reduce partition columns: p_mfgr (type: string), p_name (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int) + Reducer 2 + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string), _col2 (type: string), _col1 (type: string) + sort order: ++++ + Map-reduce partition columns: _col2 (type: string), _col1 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) + Reducer 3 + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) + Reducer 4 + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col1 (type: string), _wcol0 (type: int), _wcol1 (type: int), _col5 (type: int), _wcol2 (type: bigint), _wcol2 (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr, p_name, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +p_size, +sum(p_size) over (partition by p_mfgr order by p_name range between unbounded preceding and current row) as s1, +sum(p_size) over (partition by p_mfgr order by p_name range between unbounded preceding and current row) as s2 +from noopwithmapstreaming(on + noop(on + noopstreaming(on part + partition by p_mfgr, p_name + order by p_mfgr, p_name) + )) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, p_name, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +p_size, +sum(p_size) over (partition by p_mfgr order by p_name range between unbounded preceding and current row) as s1, +sum(p_size) over (partition by p_mfgr order by p_name range between unbounded preceding and current row) as s2 +from noopwithmapstreaming(on + noop(on + noopstreaming(on part + partition by p_mfgr, p_name + order by p_mfgr, p_name) + )) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +Manufacturer#1 almond antique burnished rose metallic 1 1 2 4 4 +Manufacturer#1 almond antique burnished rose metallic 1 1 2 4 4 +Manufacturer#1 almond antique chartreuse lavender yellow 3 2 34 38 38 +Manufacturer#1 almond antique salmon chartreuse burlywood 4 3 6 44 44 +Manufacturer#1 almond aquamarine burnished black steel 5 4 28 72 72 +Manufacturer#1 almond aquamarine pink moccasin thistle 6 5 42 114 114 +Manufacturer#2 almond antique violet chocolate turquoise 1 1 14 14 14 +Manufacturer#2 almond antique violet turquoise frosted 2 2 40 54 54 +Manufacturer#2 almond aquamarine midnight light salmon 3 3 2 56 56 +Manufacturer#2 almond aquamarine rose maroon antique 4 4 25 81 81 +Manufacturer#2 almond aquamarine sandy cyan gainsboro 5 5 18 99 99 +Manufacturer#3 almond antique chartreuse khaki white 1 1 17 17 17 +Manufacturer#3 almond antique forest lavender goldenrod 2 2 14 31 31 +Manufacturer#3 almond antique metallic orange dim 3 3 19 50 50 +Manufacturer#3 almond antique misty red olive 4 4 1 51 51 +Manufacturer#3 almond antique olive coral navajo 5 5 45 96 96 +Manufacturer#4 almond antique gainsboro frosted violet 1 1 10 10 10 +Manufacturer#4 almond antique violet mint lemon 2 2 39 49 49 +Manufacturer#4 almond aquamarine floral ivory bisque 3 3 27 76 76 +Manufacturer#4 almond aquamarine yellow dodger mint 4 4 7 83 83 +Manufacturer#4 almond azure aquamarine papaya violet 5 5 12 95 95 +Manufacturer#5 almond antique blue firebrick mint 1 1 31 31 31 +Manufacturer#5 almond antique medium spring khaki 2 2 6 37 37 +Manufacturer#5 almond antique sky peru orange 3 3 2 39 39 +Manufacturer#5 almond aquamarine dodger light gainsboro 4 4 46 85 85 +Manufacturer#5 almond azure blanched chiffon midnight 5 5 23 108 108 diff --git ql/src/test/results/clientpositive/tez/vectorized_ptf.q.out ql/src/test/results/clientpositive/tez/vectorized_ptf.q.out index 2363b02..5410d2b 100644 --- ql/src/test/results/clientpositive/tez/vectorized_ptf.q.out +++ ql/src/test/results/clientpositive/tez/vectorized_ptf.q.out @@ -244,7 +244,7 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string), BLOCK__OFFSET__INSIDE__FILE (type: bigint), INPUT__FILE__NAME (type: string), ROW__ID (type: struct) + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int), p_retailprice (type: double) auto parallelism: true Path -> Alias: #### A masked pattern was here #### @@ -295,6 +295,7 @@ STAGE PLANS: name: default.part_orc Truncated Path -> Alias: /part_orc [part_orc] + Execution mode: vectorized Reducer 2 Needs Tagging: false Reduce Operator Tree: @@ -302,18 +303,14 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE PTF Operator Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int), _col7 (type: double) - outputColumnNames: _col1, _col2, _col5, _col7 + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col2 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col2 (type: string) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int), _col7 (type: double) - auto parallelism: true + tag: -1 + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int), _col7 (type: double) + auto parallelism: true Reducer 3 Needs Tagging: false Reduce Operator Tree: @@ -534,7 +531,7 @@ STAGE PLANS: Map-reduce partition columns: p_partkey (type: int) Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE tag: 0 - value expressions: p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int) auto parallelism: true Path -> Alias: #### A masked pattern was here #### @@ -662,7 +659,7 @@ STAGE PLANS: keys: 0 p_partkey (type: int) 1 p_partkey (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + outputColumnNames: _col1, _col2, _col5 Position of Big Table: 0 Statistics: Num rows: 14 Data size: 8823 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator @@ -671,7 +668,7 @@ STAGE PLANS: Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 14 Data size: 8823 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) auto parallelism: true Reducer 3 Needs Tagging: false @@ -680,18 +677,14 @@ STAGE PLANS: Statistics: Num rows: 14 Data size: 8823 Basic stats: COMPLETE Column stats: NONE PTF Operator Statistics: Num rows: 14 Data size: 8823 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) - outputColumnNames: _col1, _col2, _col5 + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 14 Data size: 8823 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col2 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col2 (type: string) - Statistics: Num rows: 14 Data size: 8823 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) - auto parallelism: true + tag: -1 + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) + auto parallelism: true Reducer 4 Needs Tagging: false Reduce Operator Tree: @@ -847,7 +840,7 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string), BLOCK__OFFSET__INSIDE__FILE (type: bigint), INPUT__FILE__NAME (type: string), ROW__ID (type: struct) + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int) auto parallelism: true Path -> Alias: #### A masked pattern was here #### @@ -898,6 +891,7 @@ STAGE PLANS: name: default.part_orc Truncated Path -> Alias: /part_orc [part_orc] + Execution mode: vectorized Reducer 2 Needs Tagging: false Reduce Operator Tree: @@ -1104,7 +1098,7 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string), BLOCK__OFFSET__INSIDE__FILE (type: bigint), INPUT__FILE__NAME (type: string), ROW__ID (type: struct) + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int), p_retailprice (type: double) auto parallelism: true Path -> Alias: #### A masked pattern was here #### @@ -1155,6 +1149,7 @@ STAGE PLANS: name: default.part_orc Truncated Path -> Alias: /part_orc [part_orc] + Execution mode: vectorized Reducer 2 Needs Tagging: false Reduce Operator Tree: @@ -1162,18 +1157,14 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE PTF Operator Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int), _col7 (type: double) - outputColumnNames: _col1, _col2, _col5, _col7 + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col2 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col2 (type: string) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int), _col7 (type: double) - auto parallelism: true + tag: -1 + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int), _col7 (type: double) + auto parallelism: true Reducer 3 Needs Tagging: false Reduce Operator Tree: @@ -1392,7 +1383,7 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string), BLOCK__OFFSET__INSIDE__FILE (type: bigint), INPUT__FILE__NAME (type: string), ROW__ID (type: struct) + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int) auto parallelism: true Path -> Alias: #### A masked pattern was here #### @@ -1443,6 +1434,7 @@ STAGE PLANS: name: default.part_orc Truncated Path -> Alias: /part_orc [part_orc] + Execution mode: vectorized Reducer 2 Needs Tagging: false Reduce Operator Tree: @@ -1450,18 +1442,14 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE PTF Operator Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) - outputColumnNames: _col1, _col2, _col5 + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col2 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col2 (type: string) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) - auto parallelism: true + tag: -1 + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) + auto parallelism: true Reducer 3 Needs Tagging: false Reduce Operator Tree: @@ -1690,7 +1678,7 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string), BLOCK__OFFSET__INSIDE__FILE (type: bigint), INPUT__FILE__NAME (type: string), ROW__ID (type: struct) + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int) auto parallelism: true Path -> Alias: #### A masked pattern was here #### @@ -1741,6 +1729,7 @@ STAGE PLANS: name: default.part_orc Truncated Path -> Alias: /part_orc [part_orc] + Execution mode: vectorized Reducer 2 Needs Tagging: false Reduce Operator Tree: @@ -1954,7 +1943,7 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string), BLOCK__OFFSET__INSIDE__FILE (type: bigint), INPUT__FILE__NAME (type: string), ROW__ID (type: struct) + value expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) auto parallelism: true Path -> Alias: #### A masked pattern was here #### @@ -2005,6 +1994,7 @@ STAGE PLANS: name: default.part_orc Truncated Path -> Alias: /part_orc [part_orc] + Execution mode: vectorized Map 4 Map Operator Tree: TableScan @@ -2079,22 +2069,18 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE PTF Operator Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Filter Operator - isSamplingPred: false - predicate: _col0 is not null (type: boolean) + Filter Operator + isSamplingPred: false + predicate: _col0 is not null (type: boolean) + Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE - tag: 0 - value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) - auto parallelism: true + tag: 0 + value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + auto parallelism: true Reducer 3 Needs Tagging: false Reduce Operator Tree: @@ -2335,7 +2321,7 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string), BLOCK__OFFSET__INSIDE__FILE (type: bigint), INPUT__FILE__NAME (type: string), ROW__ID (type: struct) + value expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) auto parallelism: true Path -> Alias: #### A masked pattern was here #### @@ -2386,6 +2372,7 @@ STAGE PLANS: name: default.part_orc Truncated Path -> Alias: /part_orc [part_orc] + Execution mode: vectorized Reducer 2 Needs Tagging: false Reduce Operator Tree: @@ -2430,22 +2417,18 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE PTF Operator Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Filter Operator - isSamplingPred: false - predicate: _col0 is not null (type: boolean) + Filter Operator + isSamplingPred: false + predicate: _col0 is not null (type: boolean) + Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE - tag: 1 - value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) - auto parallelism: true + tag: 1 + value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + auto parallelism: true Stage: Stage-0 Fetch Operator @@ -2593,7 +2576,7 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string), BLOCK__OFFSET__INSIDE__FILE (type: bigint), INPUT__FILE__NAME (type: string), ROW__ID (type: struct) + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int) auto parallelism: true Path -> Alias: #### A masked pattern was here #### @@ -2651,18 +2634,14 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE PTF Operator Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) - outputColumnNames: _col1, _col2, _col5 + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int) + sort order: ++- + Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int) - sort order: ++- - Map-reduce partition columns: _col2 (type: string) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) - auto parallelism: true + tag: -1 + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) + auto parallelism: true Reducer 3 Needs Tagging: false Reduce Operator Tree: @@ -2870,7 +2849,7 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string), BLOCK__OFFSET__INSIDE__FILE (type: bigint), INPUT__FILE__NAME (type: string), ROW__ID (type: struct) + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int), p_retailprice (type: double) auto parallelism: true Path -> Alias: #### A masked pattern was here #### @@ -2928,18 +2907,14 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE PTF Operator Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int), _col7 (type: double) - outputColumnNames: _col1, _col2, _col5, _col7 + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col2 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col2 (type: string) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int), _col7 (type: double) - auto parallelism: true + tag: -1 + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int), _col7 (type: double) + auto parallelism: true Reducer 3 Needs Tagging: false Reduce Operator Tree: @@ -3149,7 +3124,7 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string), BLOCK__OFFSET__INSIDE__FILE (type: bigint), INPUT__FILE__NAME (type: string), ROW__ID (type: struct) + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int), p_retailprice (type: double) auto parallelism: true Path -> Alias: #### A masked pattern was here #### @@ -3200,6 +3175,7 @@ STAGE PLANS: name: default.part_orc Truncated Path -> Alias: /part_orc [part_orc] + Execution mode: vectorized Reducer 2 Needs Tagging: false Reduce Operator Tree: @@ -3207,18 +3183,14 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE PTF Operator Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int), _col7 (type: double) - outputColumnNames: _col1, _col2, _col5, _col7 + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col2 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col2 (type: string) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int), _col7 (type: double) - auto parallelism: true + tag: -1 + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int), _col7 (type: double) + auto parallelism: true Reducer 3 Needs Tagging: false Reduce Operator Tree: @@ -3438,7 +3410,7 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string), BLOCK__OFFSET__INSIDE__FILE (type: bigint), INPUT__FILE__NAME (type: string), ROW__ID (type: struct) + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int), p_retailprice (type: double) auto parallelism: true Path -> Alias: #### A masked pattern was here #### @@ -3489,6 +3461,7 @@ STAGE PLANS: name: default.part_orc Truncated Path -> Alias: /part_orc [part_orc] + Execution mode: vectorized Reducer 2 Needs Tagging: false Reduce Operator Tree: @@ -3504,7 +3477,7 @@ STAGE PLANS: Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: bigint), _col10 (type: string), _col11 (type: struct) + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int), _col7 (type: double) auto parallelism: true Reducer 3 Needs Tagging: false @@ -3513,18 +3486,14 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE PTF Operator Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int), _col7 (type: double) - outputColumnNames: _col1, _col2, _col5, _col7 + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col2 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col2 (type: string) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int), _col7 (type: double) - auto parallelism: true + tag: -1 + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int), _col7 (type: double) + auto parallelism: true Reducer 4 Needs Tagging: false Reduce Operator Tree: @@ -3764,7 +3733,7 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string), BLOCK__OFFSET__INSIDE__FILE (type: bigint), INPUT__FILE__NAME (type: string), ROW__ID (type: struct) + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int), p_retailprice (type: double) auto parallelism: true Path -> Alias: #### A masked pattern was here #### @@ -3815,6 +3784,7 @@ STAGE PLANS: name: default.part_orc Truncated Path -> Alias: /part_orc [part_orc] + Execution mode: vectorized Reducer 2 Needs Tagging: false Reduce Operator Tree: @@ -3822,18 +3792,14 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE PTF Operator Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int), _col7 (type: double) - outputColumnNames: _col1, _col2, _col5, _col7 + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col2 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col2 (type: string) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int), _col7 (type: double) - auto parallelism: true + tag: -1 + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int), _col7 (type: double) + auto parallelism: true Reducer 3 Needs Tagging: false Reduce Operator Tree: @@ -4150,7 +4116,7 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string), BLOCK__OFFSET__INSIDE__FILE (type: bigint), INPUT__FILE__NAME (type: string), ROW__ID (type: struct) + value expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_size (type: int), p_retailprice (type: double) auto parallelism: true Path -> Alias: #### A masked pattern was here #### @@ -4201,6 +4167,7 @@ STAGE PLANS: name: default.part_orc Truncated Path -> Alias: /part_orc [part_orc] + Execution mode: vectorized Map 5 Map Operator Tree: TableScan @@ -4275,22 +4242,18 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE PTF Operator Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col5 (type: int), _col7 (type: double) - outputColumnNames: _col0, _col1, _col2, _col5, _col7 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Filter Operator - isSamplingPred: false - predicate: _col0 is not null (type: boolean) + Filter Operator + isSamplingPred: false + predicate: _col0 is not null (type: boolean) + Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE - tag: 0 - value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int), _col7 (type: double) - auto parallelism: true + tag: 0 + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int), _col7 (type: double) + auto parallelism: true Reducer 3 Needs Tagging: false Reduce Operator Tree: @@ -4477,7 +4440,7 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string), BLOCK__OFFSET__INSIDE__FILE (type: bigint), INPUT__FILE__NAME (type: string), ROW__ID (type: struct) + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int) auto parallelism: true Path -> Alias: #### A masked pattern was here #### @@ -4528,6 +4491,7 @@ STAGE PLANS: name: default.part_orc Truncated Path -> Alias: /part_orc [part_orc] + Execution mode: vectorized Reducer 2 Needs Tagging: false Reduce Operator Tree: @@ -5193,10 +5157,10 @@ TOK_QUERY STAGE DEPENDENCIES: Stage-2 is a root stage Stage-3 depends on stages: Stage-2 - Stage-1 depends on stages: Stage-3 - Stage-4 depends on stages: Stage-1 Stage-0 depends on stages: Stage-3 - Stage-5 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-0 + Stage-1 depends on stages: Stage-3 + Stage-5 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-2 @@ -5204,8 +5168,8 @@ STAGE PLANS: Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (SIMPLE_EDGE) - Reducer 4 <- Reducer 3 (SIMPLE_EDGE) - Reducer 5 <- Reducer 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 2 (SIMPLE_EDGE) + Reducer 5 <- Reducer 4 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -5220,7 +5184,7 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string), BLOCK__OFFSET__INSIDE__FILE (type: bigint), INPUT__FILE__NAME (type: string), ROW__ID (type: struct) + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int), p_retailprice (type: double) auto parallelism: true Path -> Alias: #### A masked pattern was here #### @@ -5271,6 +5235,7 @@ STAGE PLANS: name: default.part_orc Truncated Path -> Alias: /part_orc [part_orc] + Execution mode: vectorized Reducer 2 Needs Tagging: false Reduce Operator Tree: @@ -5279,41 +5244,22 @@ STAGE PLANS: PTF Operator Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col2 (type: string), _col5 (type: int) + key expressions: _col2 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int), _col7 (type: double) auto parallelism: true - Select Operator - expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int), _col7 (type: double) - outputColumnNames: _col1, _col2, _col5, _col7 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col2 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col2 (type: string) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int), _col7 (type: double) - auto parallelism: true - Reducer 3 - Needs Tagging: false - Reduce Operator Tree: - Extract - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - PTF Operator - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col2 (type: string), _col2 (type: string), _col1 (type: string) - sort order: +++ + key expressions: _col2 (type: string), _col5 (type: int) + sort order: ++ Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: _wcol0 (type: bigint), _col1 (type: string), _col2 (type: string), _col5 (type: int) + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) auto parallelism: true - Reducer 4 + Reducer 3 Needs Tagging: false Reduce Operator Tree: Extract @@ -5321,12 +5267,12 @@ STAGE PLANS: PTF Operator Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col3 (type: string), _col2 (type: string), _col6 (type: int), UDFToInteger(round(_col0, 1)) (type: int), _wcol1 (type: int), _wcol2 (type: int), _wcol3 (type: double), _wcol4 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 2 + GlobalTableId: 1 #### A masked pattern was here #### NumFilesPerFileSink: 1 Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE @@ -5336,20 +5282,35 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: bucket_count -1 - columns p_mfgr,p_name,p_size,s2,r,dr,cud,fv1 + columns p_mfgr,p_name,p_size,r,dr,s columns.comments - columns.types string:string:int:int:int:int:double:int + columns.types string:string:int:int:int:double #### A masked pattern was here #### - name default.part_5 - serialization.ddl struct part_5 { string p_mfgr, string p_name, i32 p_size, i32 s2, i32 r, i32 dr, double cud, i32 fv1} + name default.part_4 + serialization.ddl struct part_4 { string p_mfgr, string p_name, i32 p_size, i32 r, i32 dr, double s} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.part_5 + name: default.part_4 TotalFiles: 1 GatherStats: true MultiFileSpray: false + Reducer 4 + Needs Tagging: false + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: string), _col2 (type: string), _col1 (type: string) + sort order: +++ + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _wcol0 (type: bigint), _col1 (type: string), _col2 (type: string), _col5 (type: int) + auto parallelism: true Reducer 5 Needs Tagging: false Reduce Operator Tree: @@ -5358,12 +5319,12 @@ STAGE PLANS: PTF Operator Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: double) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + expressions: _col3 (type: string), _col2 (type: string), _col6 (type: int), UDFToInteger(round(_col0, 1)) (type: int), _wcol1 (type: int), _wcol2 (type: int), _wcol3 (type: double), _wcol4 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + GlobalTableId: 2 #### A masked pattern was here #### NumFilesPerFileSink: 1 Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE @@ -5373,17 +5334,17 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: bucket_count -1 - columns p_mfgr,p_name,p_size,r,dr,s + columns p_mfgr,p_name,p_size,s2,r,dr,cud,fv1 columns.comments - columns.types string:string:int:int:int:double + columns.types string:string:int:int:int:int:double:int #### A masked pattern was here #### - name default.part_4 - serialization.ddl struct part_4 { string p_mfgr, string p_name, i32 p_size, i32 r, i32 dr, double s} + name default.part_5 + serialization.ddl struct part_5 { string p_mfgr, string p_name, i32 p_size, i32 s2, i32 r, i32 dr, double cud, i32 fv1} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.part_4 + name: default.part_5 TotalFiles: 1 GatherStats: true MultiFileSpray: false @@ -5391,7 +5352,7 @@ STAGE PLANS: Stage: Stage-3 Dependency Collection - Stage: Stage-1 + Stage: Stage-0 Move Operator tables: replace: true @@ -5401,23 +5362,23 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: bucket_count -1 - columns p_mfgr,p_name,p_size,s2,r,dr,cud,fv1 + columns p_mfgr,p_name,p_size,r,dr,s columns.comments - columns.types string:string:int:int:int:int:double:int + columns.types string:string:int:int:int:double #### A masked pattern was here #### - name default.part_5 - serialization.ddl struct part_5 { string p_mfgr, string p_name, i32 p_size, i32 s2, i32 r, i32 dr, double cud, i32 fv1} + name default.part_4 + serialization.ddl struct part_4 { string p_mfgr, string p_name, i32 p_size, i32 r, i32 dr, double s} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.part_5 + name: default.part_4 Stage: Stage-4 Stats-Aggr Operator #### A masked pattern was here #### - Stage: Stage-0 + Stage: Stage-1 Move Operator tables: replace: true @@ -5427,17 +5388,17 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: bucket_count -1 - columns p_mfgr,p_name,p_size,r,dr,s + columns p_mfgr,p_name,p_size,s2,r,dr,cud,fv1 columns.comments - columns.types string:string:int:int:int:double + columns.types string:string:int:int:int:int:double:int #### A masked pattern was here #### - name default.part_4 - serialization.ddl struct part_4 { string p_mfgr, string p_name, i32 p_size, i32 r, i32 dr, double s} + name default.part_5 + serialization.ddl struct part_5 { string p_mfgr, string p_name, i32 p_size, i32 s2, i32 r, i32 dr, double cud, i32 fv1} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.part_4 + name: default.part_5 Stage: Stage-5 Stats-Aggr Operator @@ -5728,7 +5689,7 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string), BLOCK__OFFSET__INSIDE__FILE (type: bigint), INPUT__FILE__NAME (type: string), ROW__ID (type: struct) + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int) auto parallelism: true Path -> Alias: #### A masked pattern was here #### @@ -5779,6 +5740,7 @@ STAGE PLANS: name: default.part_orc Truncated Path -> Alias: /part_orc [part_orc] + Execution mode: vectorized Reducer 2 Needs Tagging: false Reduce Operator Tree: @@ -5794,7 +5756,7 @@ STAGE PLANS: Map-reduce partition columns: _col2 (type: string), _col1 (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: bigint), _col10 (type: string), _col11 (type: struct) + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) auto parallelism: true Reducer 3 Needs Tagging: false @@ -5803,18 +5765,14 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE PTF Operator Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) - outputColumnNames: _col1, _col2, _col5 + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string), _col2 (type: string), _col1 (type: string) + sort order: ++++ + Map-reduce partition columns: _col2 (type: string), _col1 (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col2 (type: string), _col1 (type: string), _col2 (type: string), _col1 (type: string) - sort order: ++++ - Map-reduce partition columns: _col2 (type: string), _col1 (type: string) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) - auto parallelism: true + tag: -1 + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) + auto parallelism: true Reducer 4 Needs Tagging: false Reduce Operator Tree: @@ -6085,7 +6043,7 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string), BLOCK__OFFSET__INSIDE__FILE (type: bigint), INPUT__FILE__NAME (type: string), ROW__ID (type: struct) + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int) auto parallelism: true Path -> Alias: #### A masked pattern was here #### @@ -6136,6 +6094,7 @@ STAGE PLANS: name: default.part_orc Truncated Path -> Alias: /part_orc [part_orc] + Execution mode: vectorized Reducer 2 Needs Tagging: false Reduce Operator Tree: @@ -6149,7 +6108,7 @@ STAGE PLANS: Map-reduce partition columns: _col2 (type: string), _col1 (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: bigint), _col10 (type: string), _col11 (type: struct) + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) auto parallelism: true Reducer 3 Needs Tagging: false @@ -6164,7 +6123,7 @@ STAGE PLANS: Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: bigint), _col10 (type: string), _col11 (type: struct) + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) auto parallelism: true Reducer 4 Needs Tagging: false @@ -6173,18 +6132,14 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE PTF Operator Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) - outputColumnNames: _col1, _col2, _col5 + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col2 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col2 (type: string) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) - auto parallelism: true + tag: -1 + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) + auto parallelism: true Reducer 5 Needs Tagging: false Reduce Operator Tree: @@ -6438,7 +6393,7 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string), p_name (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string), BLOCK__OFFSET__INSIDE__FILE (type: bigint), INPUT__FILE__NAME (type: string), ROW__ID (type: struct) + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int) auto parallelism: true Path -> Alias: #### A masked pattern was here #### @@ -6489,6 +6444,7 @@ STAGE PLANS: name: default.part_orc Truncated Path -> Alias: /part_orc [part_orc] + Execution mode: vectorized Reducer 2 Needs Tagging: false Reduce Operator Tree: @@ -6502,7 +6458,7 @@ STAGE PLANS: Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: bigint), _col10 (type: string), _col11 (type: struct) + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) auto parallelism: true Reducer 3 Needs Tagging: false @@ -6511,18 +6467,14 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE PTF Operator Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) - outputColumnNames: _col1, _col2, _col5 + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col2 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col2 (type: string) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) - auto parallelism: true + tag: -1 + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) + auto parallelism: true Reducer 4 Needs Tagging: false Reduce Operator Tree: @@ -6788,7 +6740,7 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string), p_name (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string), BLOCK__OFFSET__INSIDE__FILE (type: bigint), INPUT__FILE__NAME (type: string), ROW__ID (type: struct) + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int) auto parallelism: true Path -> Alias: #### A masked pattern was here #### @@ -6839,6 +6791,7 @@ STAGE PLANS: name: default.part_orc Truncated Path -> Alias: /part_orc [part_orc] + Execution mode: vectorized Reducer 2 Needs Tagging: false Reduce Operator Tree: @@ -6852,7 +6805,7 @@ STAGE PLANS: Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: bigint), _col10 (type: string), _col11 (type: struct) + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) auto parallelism: true Reducer 3 Needs Tagging: false @@ -6869,7 +6822,7 @@ STAGE PLANS: Map-reduce partition columns: _col2 (type: string), _col1 (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: bigint), _col10 (type: string), _col11 (type: struct) + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) auto parallelism: true Reducer 4 Needs Tagging: false @@ -6878,18 +6831,14 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE PTF Operator Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) - outputColumnNames: _col1, _col2, _col5 + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string), _col2 (type: string), _col1 (type: string) + sort order: ++++ + Map-reduce partition columns: _col2 (type: string), _col1 (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col2 (type: string), _col1 (type: string), _col2 (type: string), _col1 (type: string) - sort order: ++++ - Map-reduce partition columns: _col2 (type: string), _col1 (type: string) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) - auto parallelism: true + tag: -1 + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) + auto parallelism: true Reducer 5 Needs Tagging: false Reduce Operator Tree: @@ -7186,7 +7135,7 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string), p_name (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string), BLOCK__OFFSET__INSIDE__FILE (type: bigint), INPUT__FILE__NAME (type: string), ROW__ID (type: struct) + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int) auto parallelism: true Path -> Alias: #### A masked pattern was here #### @@ -7237,6 +7186,7 @@ STAGE PLANS: name: default.part_orc Truncated Path -> Alias: /part_orc [part_orc] + Execution mode: vectorized Reducer 2 Needs Tagging: false Reduce Operator Tree: @@ -7252,7 +7202,7 @@ STAGE PLANS: Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: bigint), _col10 (type: string), _col11 (type: struct) + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) auto parallelism: true Reducer 3 Needs Tagging: false @@ -7261,18 +7211,14 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE PTF Operator Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) - outputColumnNames: _col1, _col2, _col5 + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string), _col2 (type: string), _col1 (type: string) + sort order: ++++ + Map-reduce partition columns: _col2 (type: string), _col1 (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col2 (type: string), _col1 (type: string), _col2 (type: string), _col1 (type: string) - sort order: ++++ - Map-reduce partition columns: _col2 (type: string), _col1 (type: string) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) - auto parallelism: true + tag: -1 + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) + auto parallelism: true Reducer 4 Needs Tagging: false Reduce Operator Tree: @@ -7535,7 +7481,7 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string), p_name (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string), BLOCK__OFFSET__INSIDE__FILE (type: bigint), INPUT__FILE__NAME (type: string), ROW__ID (type: struct) + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int) auto parallelism: true Path -> Alias: #### A masked pattern was here #### @@ -7586,6 +7532,7 @@ STAGE PLANS: name: default.part_orc Truncated Path -> Alias: /part_orc [part_orc] + Execution mode: vectorized Reducer 2 Needs Tagging: false Reduce Operator Tree: @@ -7601,7 +7548,7 @@ STAGE PLANS: Map-reduce partition columns: _col2 (type: string), _col1 (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: bigint), _col10 (type: string), _col11 (type: struct) + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) auto parallelism: true Reducer 3 Needs Tagging: false @@ -7610,18 +7557,14 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE PTF Operator Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) - outputColumnNames: _col1, _col2, _col5 + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col2 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col2 (type: string) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) - auto parallelism: true + tag: -1 + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) + auto parallelism: true Reducer 4 Needs Tagging: false Reduce Operator Tree: diff --git ql/src/test/results/clientpositive/vectorized_ptf.q.out ql/src/test/results/clientpositive/vectorized_ptf.q.out index 3d60d7f..de55007 100644 --- ql/src/test/results/clientpositive/vectorized_ptf.q.out +++ ql/src/test/results/clientpositive/vectorized_ptf.q.out @@ -239,7 +239,7 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string), BLOCK__OFFSET__INSIDE__FILE (type: bigint), INPUT__FILE__NAME (type: string), ROW__ID (type: struct) + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int), p_retailprice (type: double) auto parallelism: false Path -> Alias: #### A masked pattern was here #### @@ -290,33 +290,30 @@ STAGE PLANS: name: default.part_orc Truncated Path -> Alias: /part_orc [part_orc] + Execution mode: vectorized Needs Tagging: false Reduce Operator Tree: Extract Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE PTF Operator Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int), _col7 (type: double) - outputColumnNames: _col1, _col2, _col5, _col7 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 + File Output Operator + compressed: false + GlobalTableId: 0 #### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col1,_col2,_col5,_col7 - columns.types string,string,int,double - escape.delim \ - serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col1,_col2,_col5,_col7 + columns.types string,string,int,double + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Map Reduce @@ -570,7 +567,7 @@ STAGE PLANS: Map-reduce partition columns: p_partkey (type: int) Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE tag: 0 - value expressions: p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int) auto parallelism: false TableScan alias: p2 @@ -645,7 +642,7 @@ STAGE PLANS: keys: 0 p_partkey (type: int) 1 p_partkey (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + outputColumnNames: _col1, _col2, _col5 Statistics: Num rows: 14 Data size: 8823 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -656,8 +653,8 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: - columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8 - columns.types int,string,string,string,string,int,string,double,string + columns _col1,_col2,_col5 + columns.types string,string,int escape.delim \ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe @@ -676,7 +673,7 @@ STAGE PLANS: Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 14 Data size: 8823 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) auto parallelism: false Path -> Alias: #### A masked pattern was here #### @@ -687,8 +684,8 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: - columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8 - columns.types int,string,string,string,string,int,string,double,string + columns _col1,_col2,_col5 + columns.types string,string,int escape.delim \ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe @@ -696,8 +693,8 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: - columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8 - columns.types int,string,string,string,string,int,string,double,string + columns _col1,_col2,_col5 + columns.types string,string,int escape.delim \ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe @@ -709,27 +706,23 @@ STAGE PLANS: Statistics: Num rows: 14 Data size: 8823 Basic stats: COMPLETE Column stats: NONE PTF Operator Statistics: Num rows: 14 Data size: 8823 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) - outputColumnNames: _col1, _col2, _col5 - Statistics: Num rows: 14 Data size: 8823 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 + File Output Operator + compressed: false + GlobalTableId: 0 #### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col1,_col2,_col5 - columns.types string,string,int - escape.delim \ - serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col1,_col2,_col5 + columns.types string,string,int + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-3 Map Reduce @@ -918,7 +911,7 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string), BLOCK__OFFSET__INSIDE__FILE (type: bigint), INPUT__FILE__NAME (type: string), ROW__ID (type: struct) + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int) auto parallelism: false Path -> Alias: #### A masked pattern was here #### @@ -969,6 +962,7 @@ STAGE PLANS: name: default.part_orc Truncated Path -> Alias: /part_orc [part_orc] + Execution mode: vectorized Needs Tagging: false Reduce Operator Tree: Extract @@ -1169,7 +1163,7 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string), BLOCK__OFFSET__INSIDE__FILE (type: bigint), INPUT__FILE__NAME (type: string), ROW__ID (type: struct) + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int), p_retailprice (type: double) auto parallelism: false Path -> Alias: #### A masked pattern was here #### @@ -1220,33 +1214,30 @@ STAGE PLANS: name: default.part_orc Truncated Path -> Alias: /part_orc [part_orc] + Execution mode: vectorized Needs Tagging: false Reduce Operator Tree: Extract Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE PTF Operator Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int), _col7 (type: double) - outputColumnNames: _col1, _col2, _col5, _col7 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 + File Output Operator + compressed: false + GlobalTableId: 0 #### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col1,_col2,_col5,_col7 - columns.types string,string,int,double - escape.delim \ - serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col1,_col2,_col5,_col7 + columns.types string,string,int,double + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Map Reduce @@ -1498,7 +1489,7 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string), BLOCK__OFFSET__INSIDE__FILE (type: bigint), INPUT__FILE__NAME (type: string), ROW__ID (type: struct) + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int) auto parallelism: false Path -> Alias: #### A masked pattern was here #### @@ -1549,33 +1540,30 @@ STAGE PLANS: name: default.part_orc Truncated Path -> Alias: /part_orc [part_orc] + Execution mode: vectorized Needs Tagging: false Reduce Operator Tree: Extract Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE PTF Operator Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) - outputColumnNames: _col1, _col2, _col5 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 + File Output Operator + compressed: false + GlobalTableId: 0 #### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col1,_col2,_col5 - columns.types string,string,int - escape.delim \ - serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col1,_col2,_col5 + columns.types string,string,int + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Map Reduce @@ -1837,7 +1825,7 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string), BLOCK__OFFSET__INSIDE__FILE (type: bigint), INPUT__FILE__NAME (type: string), ROW__ID (type: struct) + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int) auto parallelism: false Path -> Alias: #### A masked pattern was here #### @@ -1888,6 +1876,7 @@ STAGE PLANS: name: default.part_orc Truncated Path -> Alias: /part_orc [part_orc] + Execution mode: vectorized Needs Tagging: false Reduce Operator Tree: Extract @@ -2188,7 +2177,7 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string), BLOCK__OFFSET__INSIDE__FILE (type: bigint), INPUT__FILE__NAME (type: string), ROW__ID (type: struct) + value expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) auto parallelism: false Path -> Alias: #### A masked pattern was here #### @@ -2239,37 +2228,34 @@ STAGE PLANS: name: default.part_orc Truncated Path -> Alias: /part_orc [part_orc] + Execution mode: vectorized Needs Tagging: false Reduce Operator Tree: Extract Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE PTF Operator Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Filter Operator - isSamplingPred: false - predicate: _col0 is not null (type: boolean) - Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 + Filter Operator + isSamplingPred: false + predicate: _col0 is not null (type: boolean) + Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 #### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8 - columns.types int,string,string,string,string,int,string,double,string - escape.delim \ - serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8 + columns.types int,string,string,string,string,int,string,double,string + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Map Reduce @@ -2535,7 +2521,7 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string), BLOCK__OFFSET__INSIDE__FILE (type: bigint), INPUT__FILE__NAME (type: string), ROW__ID (type: struct) + value expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) auto parallelism: false Path -> Alias: #### A masked pattern was here #### @@ -2586,37 +2572,34 @@ STAGE PLANS: name: default.part_orc Truncated Path -> Alias: /part_orc [part_orc] + Execution mode: vectorized Needs Tagging: false Reduce Operator Tree: Extract Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE PTF Operator Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Filter Operator - isSamplingPred: false - predicate: _col0 is not null (type: boolean) - Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 + Filter Operator + isSamplingPred: false + predicate: _col0 is not null (type: boolean) + Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 #### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8 - columns.types int,string,string,string,string,int,string,double,string - escape.delim \ - serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8 + columns.types int,string,string,string,string,int,string,double,string + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-1 Map Reduce @@ -2893,7 +2876,7 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string), BLOCK__OFFSET__INSIDE__FILE (type: bigint), INPUT__FILE__NAME (type: string), ROW__ID (type: struct) + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int) auto parallelism: false Path -> Alias: #### A masked pattern was here #### @@ -2950,27 +2933,23 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE PTF Operator Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) - outputColumnNames: _col1, _col2, _col5 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 + File Output Operator + compressed: false + GlobalTableId: 0 #### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col1,_col2,_col5 - columns.types string,string,int - escape.delim \ - serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col1,_col2,_col5 + columns.types string,string,int + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Map Reduce @@ -3211,7 +3190,7 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string), BLOCK__OFFSET__INSIDE__FILE (type: bigint), INPUT__FILE__NAME (type: string), ROW__ID (type: struct) + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int), p_retailprice (type: double) auto parallelism: false Path -> Alias: #### A masked pattern was here #### @@ -3268,27 +3247,23 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE PTF Operator Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int), _col7 (type: double) - outputColumnNames: _col1, _col2, _col5, _col7 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col1,_col2,_col5,_col7 - columns.types string,string,int,double - escape.delim \ - serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col1,_col2,_col5,_col7 + columns.types string,string,int,double + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Map Reduce @@ -3531,7 +3506,7 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string), BLOCK__OFFSET__INSIDE__FILE (type: bigint), INPUT__FILE__NAME (type: string), ROW__ID (type: struct) + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int), p_retailprice (type: double) auto parallelism: false Path -> Alias: #### A masked pattern was here #### @@ -3582,33 +3557,30 @@ STAGE PLANS: name: default.part_orc Truncated Path -> Alias: /part_orc [part_orc] + Execution mode: vectorized Needs Tagging: false Reduce Operator Tree: Extract Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE PTF Operator Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int), _col7 (type: double) - outputColumnNames: _col1, _col2, _col5, _col7 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 + File Output Operator + compressed: false + GlobalTableId: 0 #### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col1,_col2,_col5,_col7 - columns.types string,string,int,double - escape.delim \ - serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col1,_col2,_col5,_col7 + columns.types string,string,int,double + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Map Reduce @@ -3861,7 +3833,7 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string), BLOCK__OFFSET__INSIDE__FILE (type: bigint), INPUT__FILE__NAME (type: string), ROW__ID (type: struct) + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int), p_retailprice (type: double) auto parallelism: false Path -> Alias: #### A masked pattern was here #### @@ -3912,6 +3884,7 @@ STAGE PLANS: name: default.part_orc Truncated Path -> Alias: /part_orc [part_orc] + Execution mode: vectorized Needs Tagging: false Reduce Operator Tree: Extract @@ -3929,8 +3902,8 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: - columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11 - columns.types int,string,string,string,string,int,string,double,string,bigint,string,struct + columns _col1,_col2,_col5,_col7 + columns.types string,string,int,double escape.delim \ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe @@ -3949,7 +3922,7 @@ STAGE PLANS: Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: bigint), _col10 (type: string), _col11 (type: struct) + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int), _col7 (type: double) auto parallelism: false Path -> Alias: #### A masked pattern was here #### @@ -3960,8 +3933,8 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: - columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11 - columns.types int,string,string,string,string,int,string,double,string,bigint,string,struct + columns _col1,_col2,_col5,_col7 + columns.types string,string,int,double escape.delim \ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe @@ -3969,8 +3942,8 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: - columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11 - columns.types int,string,string,string,string,int,string,double,string,bigint,string,struct + columns _col1,_col2,_col5,_col7 + columns.types string,string,int,double escape.delim \ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe @@ -3982,27 +3955,23 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE PTF Operator Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int), _col7 (type: double) - outputColumnNames: _col1, _col2, _col5, _col7 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 + File Output Operator + compressed: false + GlobalTableId: 0 #### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col1,_col2,_col5,_col7 - columns.types string,string,int,double - escape.delim \ - serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col1,_col2,_col5,_col7 + columns.types string,string,int,double + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-3 Map Reduce @@ -4275,7 +4244,7 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string), BLOCK__OFFSET__INSIDE__FILE (type: bigint), INPUT__FILE__NAME (type: string), ROW__ID (type: struct) + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int), p_retailprice (type: double) auto parallelism: false Path -> Alias: #### A masked pattern was here #### @@ -4326,33 +4295,30 @@ STAGE PLANS: name: default.part_orc Truncated Path -> Alias: /part_orc [sub1:part_orc] + Execution mode: vectorized Needs Tagging: false Reduce Operator Tree: Extract Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE PTF Operator Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int), _col7 (type: double) - outputColumnNames: _col1, _col2, _col5, _col7 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 + File Output Operator + compressed: false + GlobalTableId: 0 #### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col1,_col2,_col5,_col7 - columns.types string,string,int,double - escape.delim \ - serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col1,_col2,_col5,_col7 + columns.types string,string,int,double + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Map Reduce @@ -4702,7 +4668,7 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string), BLOCK__OFFSET__INSIDE__FILE (type: bigint), INPUT__FILE__NAME (type: string), ROW__ID (type: struct) + value expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_size (type: int), p_retailprice (type: double) auto parallelism: false Path -> Alias: #### A masked pattern was here #### @@ -4753,37 +4719,34 @@ STAGE PLANS: name: default.part_orc Truncated Path -> Alias: /part_orc [part_orc] + Execution mode: vectorized Needs Tagging: false Reduce Operator Tree: Extract Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE PTF Operator Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col5 (type: int), _col7 (type: double) - outputColumnNames: _col0, _col1, _col2, _col5, _col7 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Filter Operator - isSamplingPred: false - predicate: _col0 is not null (type: boolean) - Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 + Filter Operator + isSamplingPred: false + predicate: _col0 is not null (type: boolean) + Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 #### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col2,_col5,_col7 - columns.types int,string,string,int,double - escape.delim \ - serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col5,_col7 + columns.types int,string,string,int,double + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Map Reduce @@ -5109,7 +5072,7 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string), BLOCK__OFFSET__INSIDE__FILE (type: bigint), INPUT__FILE__NAME (type: string), ROW__ID (type: struct) + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int) auto parallelism: false Path -> Alias: #### A masked pattern was here #### @@ -5160,6 +5123,7 @@ STAGE PLANS: name: default.part_orc Truncated Path -> Alias: /part_orc [part_orc] + Execution mode: vectorized Needs Tagging: false Reduce Operator Tree: Extract @@ -5957,12 +5921,12 @@ TOK_QUERY STAGE DEPENDENCIES: Stage-2 is a root stage Stage-3 depends on stages: Stage-2 - Stage-4 depends on stages: Stage-3 - Stage-1 depends on stages: Stage-4 - Stage-5 depends on stages: Stage-1 - Stage-6 depends on stages: Stage-2 - Stage-0 depends on stages: Stage-6 - Stage-7 depends on stages: Stage-0 + Stage-0 depends on stages: Stage-3 + Stage-4 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-2 + Stage-6 depends on stages: Stage-5 + Stage-1 depends on stages: Stage-6 + Stage-7 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-2 @@ -5978,7 +5942,7 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string), BLOCK__OFFSET__INSIDE__FILE (type: bigint), INPUT__FILE__NAME (type: string), ROW__ID (type: struct) + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int), p_retailprice (type: double) auto parallelism: false Path -> Alias: #### A masked pattern was here #### @@ -6029,6 +5993,7 @@ STAGE PLANS: name: default.part_orc Truncated Path -> Alias: /part_orc [part_orc] + Execution mode: vectorized Needs Tagging: false Reduce Operator Tree: Extract @@ -6044,80 +6009,14 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: - columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11 - columns.types int,string,string,string,string,int,string,double,string,bigint,string,struct + columns _col1,_col2,_col5,_col7 + columns.types string,string,int,double escape.delim \ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe TotalFiles: 1 GatherStats: false MultiFileSpray: false - Select Operator - expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int), _col7 (type: double) - outputColumnNames: _col1, _col2, _col5, _col7 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col1,_col2,_col5,_col7 - columns.types string,string,int,double - escape.delim \ - serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - - Stage: Stage-3 - Map Reduce - Map Operator Tree: - TableScan - GatherStats: false - Reduce Output Operator - key expressions: _col2 (type: string), _col5 (type: int) - sort order: ++ - Map-reduce partition columns: _col2 (type: string) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) - auto parallelism: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: -mr-10002 - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11 - columns.types int,string,string,string,string,int,string,double,string,bigint,string,struct - escape.delim \ - serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11 - columns.types int,string,string,string,string,int,string,double,string,bigint,string,struct - escape.delim \ - serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Truncated Path -> Alias: -#### A masked pattern was here #### - Needs Tagging: false - Reduce Operator Tree: - Extract - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - PTF Operator - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 @@ -6127,8 +6026,8 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: - columns _wcol0,_col1,_col2,_col5 - columns.types bigint,string,string,int + columns _col1,_col2,_col5,_col7 + columns.types string,string,int,double escape.delim \ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe @@ -6136,30 +6035,30 @@ STAGE PLANS: GatherStats: false MultiFileSpray: false - Stage: Stage-4 + Stage: Stage-3 Map Reduce Map Operator Tree: TableScan GatherStats: false Reduce Output Operator - key expressions: _col2 (type: string), _col2 (type: string), _col1 (type: string) - sort order: +++ + key expressions: _col2 (type: string), _col1 (type: string) + sort order: ++ Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: _wcol0 (type: bigint), _col1 (type: string), _col2 (type: string), _col5 (type: int) + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int), _col7 (type: double) auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition - base file name: -mr-10003 + base file name: -mr-10002 input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: - columns _wcol0,_col1,_col2,_col5 - columns.types bigint,string,string,int + columns _col1,_col2,_col5,_col7 + columns.types string,string,int,double escape.delim \ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe @@ -6167,8 +6066,8 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: - columns _wcol0,_col1,_col2,_col5 - columns.types bigint,string,string,int + columns _col1,_col2,_col5,_col7 + columns.types string,string,int,double escape.delim \ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe @@ -6181,12 +6080,12 @@ STAGE PLANS: PTF Operator Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col3 (type: string), _col2 (type: string), _col6 (type: int), UDFToInteger(round(_col0, 1)) (type: int), _wcol1 (type: int), _wcol2 (type: int), _wcol3 (type: double), _wcol4 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 2 + GlobalTableId: 1 #### A masked pattern was here #### NumFilesPerFileSink: 1 Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE @@ -6196,22 +6095,22 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: bucket_count -1 - columns p_mfgr,p_name,p_size,s2,r,dr,cud,fv1 + columns p_mfgr,p_name,p_size,r,dr,s columns.comments - columns.types string:string:int:int:int:int:double:int + columns.types string:string:int:int:int:double #### A masked pattern was here #### - name default.part_5 - serialization.ddl struct part_5 { string p_mfgr, string p_name, i32 p_size, i32 s2, i32 r, i32 dr, double cud, i32 fv1} + name default.part_4 + serialization.ddl struct part_4 { string p_mfgr, string p_name, i32 p_size, i32 r, i32 dr, double s} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.part_5 + name: default.part_4 TotalFiles: 1 GatherStats: true MultiFileSpray: false - Stage: Stage-1 + Stage: Stage-0 Move Operator tables: replace: true @@ -6221,41 +6120,41 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: bucket_count -1 - columns p_mfgr,p_name,p_size,s2,r,dr,cud,fv1 + columns p_mfgr,p_name,p_size,r,dr,s columns.comments - columns.types string:string:int:int:int:int:double:int + columns.types string:string:int:int:int:double #### A masked pattern was here #### - name default.part_5 - serialization.ddl struct part_5 { string p_mfgr, string p_name, i32 p_size, i32 s2, i32 r, i32 dr, double cud, i32 fv1} + name default.part_4 + serialization.ddl struct part_4 { string p_mfgr, string p_name, i32 p_size, i32 r, i32 dr, double s} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.part_5 + name: default.part_4 - Stage: Stage-5 + Stage: Stage-4 Stats-Aggr Operator #### A masked pattern was here #### - Stage: Stage-6 + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan GatherStats: false Reduce Output Operator - key expressions: _col2 (type: string), _col1 (type: string) + key expressions: _col2 (type: string), _col5 (type: int) sort order: ++ Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int), _col7 (type: double) + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition - base file name: -mr-10004 + base file name: -mr-10003 input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: @@ -6281,13 +6180,75 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE PTF Operator Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _wcol0,_col1,_col2,_col5 + columns.types bigint,string,string,int + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + GatherStats: false + Reduce Output Operator + key expressions: _col2 (type: string), _col2 (type: string), _col1 (type: string) + sort order: +++ + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _wcol0 (type: bigint), _col1 (type: string), _col2 (type: string), _col5 (type: int) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: -mr-10004 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _wcol0,_col1,_col2,_col5 + columns.types bigint,string,string,int + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _wcol0,_col1,_col2,_col5 + columns.types bigint,string,string,int + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Truncated Path -> Alias: +#### A masked pattern was here #### + Needs Tagging: false + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: double) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + expressions: _col3 (type: string), _col2 (type: string), _col6 (type: int), UDFToInteger(round(_col0, 1)) (type: int), _wcol1 (type: int), _wcol2 (type: int), _wcol3 (type: double), _wcol4 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + GlobalTableId: 2 #### A masked pattern was here #### NumFilesPerFileSink: 1 Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE @@ -6297,22 +6258,22 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: bucket_count -1 - columns p_mfgr,p_name,p_size,r,dr,s + columns p_mfgr,p_name,p_size,s2,r,dr,cud,fv1 columns.comments - columns.types string:string:int:int:int:double + columns.types string:string:int:int:int:int:double:int #### A masked pattern was here #### - name default.part_4 - serialization.ddl struct part_4 { string p_mfgr, string p_name, i32 p_size, i32 r, i32 dr, double s} + name default.part_5 + serialization.ddl struct part_5 { string p_mfgr, string p_name, i32 p_size, i32 s2, i32 r, i32 dr, double cud, i32 fv1} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.part_4 + name: default.part_5 TotalFiles: 1 GatherStats: true MultiFileSpray: false - Stage: Stage-0 + Stage: Stage-1 Move Operator tables: replace: true @@ -6322,17 +6283,17 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: bucket_count -1 - columns p_mfgr,p_name,p_size,r,dr,s + columns p_mfgr,p_name,p_size,s2,r,dr,cud,fv1 columns.comments - columns.types string:string:int:int:int:double + columns.types string:string:int:int:int:int:double:int #### A masked pattern was here #### - name default.part_4 - serialization.ddl struct part_4 { string p_mfgr, string p_name, i32 p_size, i32 r, i32 dr, double s} + name default.part_5 + serialization.ddl struct part_5 { string p_mfgr, string p_name, i32 p_size, i32 s2, i32 r, i32 dr, double cud, i32 fv1} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.part_4 + name: default.part_5 Stage: Stage-7 Stats-Aggr Operator @@ -6618,7 +6579,7 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string), BLOCK__OFFSET__INSIDE__FILE (type: bigint), INPUT__FILE__NAME (type: string), ROW__ID (type: struct) + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int) auto parallelism: false Path -> Alias: #### A masked pattern was here #### @@ -6669,6 +6630,7 @@ STAGE PLANS: name: default.part_orc Truncated Path -> Alias: /part_orc [part_orc] + Execution mode: vectorized Needs Tagging: false Reduce Operator Tree: Extract @@ -6686,8 +6648,8 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: - columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11 - columns.types int,string,string,string,string,int,string,double,string,bigint,string,struct + columns _col1,_col2,_col5 + columns.types string,string,int escape.delim \ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe @@ -6706,7 +6668,7 @@ STAGE PLANS: Map-reduce partition columns: _col2 (type: string), _col1 (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: bigint), _col10 (type: string), _col11 (type: struct) + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) auto parallelism: false Path -> Alias: #### A masked pattern was here #### @@ -6717,8 +6679,8 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: - columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11 - columns.types int,string,string,string,string,int,string,double,string,bigint,string,struct + columns _col1,_col2,_col5 + columns.types string,string,int escape.delim \ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe @@ -6726,8 +6688,8 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: - columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11 - columns.types int,string,string,string,string,int,string,double,string,bigint,string,struct + columns _col1,_col2,_col5 + columns.types string,string,int escape.delim \ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe @@ -6739,27 +6701,23 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE PTF Operator Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) - outputColumnNames: _col1, _col2, _col5 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 + File Output Operator + compressed: false + GlobalTableId: 0 #### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col1,_col2,_col5 - columns.types string,string,int - escape.delim \ - serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col1,_col2,_col5 + columns.types string,string,int + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-3 Map Reduce @@ -7063,7 +7021,7 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string), BLOCK__OFFSET__INSIDE__FILE (type: bigint), INPUT__FILE__NAME (type: string), ROW__ID (type: struct) + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int) auto parallelism: false Path -> Alias: #### A masked pattern was here #### @@ -7114,6 +7072,7 @@ STAGE PLANS: name: default.part_orc Truncated Path -> Alias: /part_orc [part_orc] + Execution mode: vectorized Needs Tagging: false Reduce Operator Tree: Extract @@ -7129,8 +7088,8 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: - columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11 - columns.types int,string,string,string,string,int,string,double,string,bigint,string,struct + columns _col1,_col2,_col5 + columns.types string,string,int escape.delim \ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe @@ -7149,7 +7108,7 @@ STAGE PLANS: Map-reduce partition columns: _col2 (type: string), _col1 (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: bigint), _col10 (type: string), _col11 (type: struct) + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) auto parallelism: false Path -> Alias: #### A masked pattern was here #### @@ -7160,8 +7119,8 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: - columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11 - columns.types int,string,string,string,string,int,string,double,string,bigint,string,struct + columns _col1,_col2,_col5 + columns.types string,string,int escape.delim \ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe @@ -7169,8 +7128,8 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: - columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11 - columns.types int,string,string,string,string,int,string,double,string,bigint,string,struct + columns _col1,_col2,_col5 + columns.types string,string,int escape.delim \ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe @@ -7191,8 +7150,8 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: - columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11 - columns.types int,string,string,string,string,int,string,double,string,bigint,string,struct + columns _col1,_col2,_col5 + columns.types string,string,int escape.delim \ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe @@ -7211,7 +7170,7 @@ STAGE PLANS: Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: bigint), _col10 (type: string), _col11 (type: struct) + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) auto parallelism: false Path -> Alias: #### A masked pattern was here #### @@ -7222,8 +7181,8 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: - columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11 - columns.types int,string,string,string,string,int,string,double,string,bigint,string,struct + columns _col1,_col2,_col5 + columns.types string,string,int escape.delim \ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe @@ -7231,8 +7190,8 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: - columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11 - columns.types int,string,string,string,string,int,string,double,string,bigint,string,struct + columns _col1,_col2,_col5 + columns.types string,string,int escape.delim \ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe @@ -7244,27 +7203,23 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE PTF Operator Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) - outputColumnNames: _col1, _col2, _col5 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 + File Output Operator + compressed: false + GlobalTableId: 0 #### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col1,_col2,_col5 - columns.types string,string,int - escape.delim \ - serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col1,_col2,_col5 + columns.types string,string,int + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-4 Map Reduce @@ -7551,7 +7506,7 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string), p_name (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string), BLOCK__OFFSET__INSIDE__FILE (type: bigint), INPUT__FILE__NAME (type: string), ROW__ID (type: struct) + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int) auto parallelism: false Path -> Alias: #### A masked pattern was here #### @@ -7602,6 +7557,7 @@ STAGE PLANS: name: default.part_orc Truncated Path -> Alias: /part_orc [part_orc] + Execution mode: vectorized Needs Tagging: false Reduce Operator Tree: Extract @@ -7617,8 +7573,8 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: - columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11 - columns.types int,string,string,string,string,int,string,double,string,bigint,string,struct + columns _col1,_col2,_col5 + columns.types string,string,int escape.delim \ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe @@ -7637,7 +7593,7 @@ STAGE PLANS: Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: bigint), _col10 (type: string), _col11 (type: struct) + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) auto parallelism: false Path -> Alias: #### A masked pattern was here #### @@ -7648,8 +7604,8 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: - columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11 - columns.types int,string,string,string,string,int,string,double,string,bigint,string,struct + columns _col1,_col2,_col5 + columns.types string,string,int escape.delim \ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe @@ -7657,8 +7613,8 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: - columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11 - columns.types int,string,string,string,string,int,string,double,string,bigint,string,struct + columns _col1,_col2,_col5 + columns.types string,string,int escape.delim \ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe @@ -7670,27 +7626,23 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE PTF Operator Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) - outputColumnNames: _col1, _col2, _col5 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 + File Output Operator + compressed: false + GlobalTableId: 0 #### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col1,_col2,_col5 - columns.types string,string,int - escape.delim \ - serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col1,_col2,_col5 + columns.types string,string,int + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-3 Map Reduce @@ -7989,7 +7941,7 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string), p_name (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string), BLOCK__OFFSET__INSIDE__FILE (type: bigint), INPUT__FILE__NAME (type: string), ROW__ID (type: struct) + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int) auto parallelism: false Path -> Alias: #### A masked pattern was here #### @@ -8040,6 +7992,7 @@ STAGE PLANS: name: default.part_orc Truncated Path -> Alias: /part_orc [part_orc] + Execution mode: vectorized Needs Tagging: false Reduce Operator Tree: Extract @@ -8055,8 +8008,8 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: - columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11 - columns.types int,string,string,string,string,int,string,double,string,bigint,string,struct + columns _col1,_col2,_col5 + columns.types string,string,int escape.delim \ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe @@ -8075,7 +8028,7 @@ STAGE PLANS: Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: bigint), _col10 (type: string), _col11 (type: struct) + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) auto parallelism: false Path -> Alias: #### A masked pattern was here #### @@ -8086,8 +8039,8 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: - columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11 - columns.types int,string,string,string,string,int,string,double,string,bigint,string,struct + columns _col1,_col2,_col5 + columns.types string,string,int escape.delim \ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe @@ -8095,8 +8048,8 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: - columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11 - columns.types int,string,string,string,string,int,string,double,string,bigint,string,struct + columns _col1,_col2,_col5 + columns.types string,string,int escape.delim \ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe @@ -8119,8 +8072,8 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: - columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11 - columns.types int,string,string,string,string,int,string,double,string,bigint,string,struct + columns _col1,_col2,_col5 + columns.types string,string,int escape.delim \ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe @@ -8139,7 +8092,7 @@ STAGE PLANS: Map-reduce partition columns: _col2 (type: string), _col1 (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: bigint), _col10 (type: string), _col11 (type: struct) + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) auto parallelism: false Path -> Alias: #### A masked pattern was here #### @@ -8150,8 +8103,8 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: - columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11 - columns.types int,string,string,string,string,int,string,double,string,bigint,string,struct + columns _col1,_col2,_col5 + columns.types string,string,int escape.delim \ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe @@ -8159,8 +8112,8 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: - columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11 - columns.types int,string,string,string,string,int,string,double,string,bigint,string,struct + columns _col1,_col2,_col5 + columns.types string,string,int escape.delim \ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe @@ -8172,27 +8125,23 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE PTF Operator Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) - outputColumnNames: _col1, _col2, _col5 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 + File Output Operator + compressed: false + GlobalTableId: 0 #### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col1,_col2,_col5 - columns.types string,string,int - escape.delim \ - serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col1,_col2,_col5 + columns.types string,string,int + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-4 Map Reduce @@ -8522,7 +8471,7 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string), p_name (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string), BLOCK__OFFSET__INSIDE__FILE (type: bigint), INPUT__FILE__NAME (type: string), ROW__ID (type: struct) + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int) auto parallelism: false Path -> Alias: #### A masked pattern was here #### @@ -8573,6 +8522,7 @@ STAGE PLANS: name: default.part_orc Truncated Path -> Alias: /part_orc [part_orc] + Execution mode: vectorized Needs Tagging: false Reduce Operator Tree: Extract @@ -8590,8 +8540,8 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: - columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11 - columns.types int,string,string,string,string,int,string,double,string,bigint,string,struct + columns _col1,_col2,_col5 + columns.types string,string,int escape.delim \ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe @@ -8610,7 +8560,7 @@ STAGE PLANS: Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: bigint), _col10 (type: string), _col11 (type: struct) + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) auto parallelism: false Path -> Alias: #### A masked pattern was here #### @@ -8621,8 +8571,8 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: - columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11 - columns.types int,string,string,string,string,int,string,double,string,bigint,string,struct + columns _col1,_col2,_col5 + columns.types string,string,int escape.delim \ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe @@ -8630,8 +8580,8 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: - columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11 - columns.types int,string,string,string,string,int,string,double,string,bigint,string,struct + columns _col1,_col2,_col5 + columns.types string,string,int escape.delim \ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe @@ -8643,27 +8593,23 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE PTF Operator Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) - outputColumnNames: _col1, _col2, _col5 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 + File Output Operator + compressed: false + GlobalTableId: 0 #### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col1,_col2,_col5 - columns.types string,string,int - escape.delim \ - serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col1,_col2,_col5 + columns.types string,string,int + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-3 Map Reduce @@ -8959,7 +8905,7 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string), p_name (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string), BLOCK__OFFSET__INSIDE__FILE (type: bigint), INPUT__FILE__NAME (type: string), ROW__ID (type: struct) + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int) auto parallelism: false Path -> Alias: #### A masked pattern was here #### @@ -9010,6 +8956,7 @@ STAGE PLANS: name: default.part_orc Truncated Path -> Alias: /part_orc [part_orc] + Execution mode: vectorized Needs Tagging: false Reduce Operator Tree: Extract @@ -9027,8 +8974,8 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: - columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11 - columns.types int,string,string,string,string,int,string,double,string,bigint,string,struct + columns _col1,_col2,_col5 + columns.types string,string,int escape.delim \ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe @@ -9047,7 +8994,7 @@ STAGE PLANS: Map-reduce partition columns: _col2 (type: string), _col1 (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE tag: -1 - value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: bigint), _col10 (type: string), _col11 (type: struct) + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) auto parallelism: false Path -> Alias: #### A masked pattern was here #### @@ -9058,8 +9005,8 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: - columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11 - columns.types int,string,string,string,string,int,string,double,string,bigint,string,struct + columns _col1,_col2,_col5 + columns.types string,string,int escape.delim \ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe @@ -9067,8 +9014,8 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: - columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11 - columns.types int,string,string,string,string,int,string,double,string,bigint,string,struct + columns _col1,_col2,_col5 + columns.types string,string,int escape.delim \ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe @@ -9080,27 +9027,23 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE PTF Operator Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) - outputColumnNames: _col1, _col2, _col5 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 + File Output Operator + compressed: false + GlobalTableId: 0 #### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col1,_col2,_col5 - columns.types string,string,int - escape.delim \ - serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col1,_col2,_col5 + columns.types string,string,int + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-3 Map Reduce