diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcFactory.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcFactory.java index abf32f1..bb15b2b 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcFactory.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcFactory.java @@ -275,7 +275,8 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx ctx, List prunedCols = cppCtx.getPrunedColList(op.getChildOperators().get(0)); //we create a copy of prunedCols to create a list of pruned columns for PTFOperator - prunedCols = new ArrayList(prunedCols); + prunedCols = Utilities.mergeUniqElems(getWindowFunctionColumns(def), prunedCols); + prunedColumnsList(prunedCols, def); RowResolver oldRR = cppCtx.getOpToParseCtxMap().get(op).getRowResolver(); RowResolver newRR = buildPrunedRR(prunedCols, oldRR, sig); @@ -299,6 +300,17 @@ private static RowResolver buildPrunedRR(List prunedCols, return newRR; } + // always should be in this order (see PTFDeserializer#initializeWindowing) + private List getWindowFunctionColumns(WindowTableFunctionDef tDef) { + List columns = new ArrayList(); + if (tDef.getWindowFunctions() != null) { + for (WindowFunctionDef wDef : tDef.getWindowFunctions()) { + columns.add(wDef.getAlias()); + } + } + return columns; + } + /* * add any input columns referenced in WindowFn args or expressions. */ diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/RowResolver.java ql/src/java/org/apache/hadoop/hive/ql/parse/RowResolver.java index 469dc9f..2c52a43 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/RowResolver.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/RowResolver.java @@ -458,4 +458,15 @@ public static RowResolver getCombinedRR(RowResolver leftRR, } return combinedRR; } + + public RowResolver duplicate() { + RowResolver resolver = new RowResolver(); + resolver.rowSchema = new RowSchema(rowSchema); + resolver.rslvMap.putAll(rslvMap); + resolver.invRslvMap.putAll(invRslvMap); + resolver.altInvRslvMap.putAll(altInvRslvMap); + resolver.expressionMap.putAll(expressionMap); + resolver.isExprResolver = isExprResolver; + return resolver; + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java index 4364f28..7f35c26 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java @@ -5221,7 +5221,7 @@ private Operator genGroupByPlan1ReduceMultiGBY(List dests, QB qb, Operat // insert a select operator here used by the ColumnPruner to reduce // the data to shuffle - Operator select = insertSelectAllPlanForGroupBy(selectInput); + Operator select = genSelectAllDesc(selectInput); // Generate ReduceSinkOperator ReduceSinkOperator reduceSinkOperatorInfo = @@ -8465,8 +8465,7 @@ private JoinType getType(JoinCond[] conds) { return type; } - private Operator insertSelectAllPlanForGroupBy(Operator input) - throws SemanticException { + private Operator genSelectAllDesc(Operator input) throws SemanticException { OpParseContext inputCtx = opParseCtx.get(input); RowResolver inputRR = inputCtx.getRowResolver(); ArrayList columns = inputRR.getColumnInfos(); @@ -8480,9 +8479,10 @@ private Operator insertSelectAllPlanForGroupBy(Operator input) columnNames.add(col.getInternalName()); columnExprMap.put(col.getInternalName(), new ExprNodeColumnDesc(col)); } + RowResolver outputRR = inputRR.duplicate(); Operator output = putOpInsertMap(OperatorFactory.getAndMakeChild( - new SelectDesc(colList, columnNames, true), new RowSchema(inputRR - .getColumnInfos()), input), inputRR); + new SelectDesc(colList, columnNames, true), + outputRR.getRowSchema(), input), outputRR); output.setColumnExprMap(columnExprMap); return output; } @@ -8931,7 +8931,7 @@ private Operator genBodyPlan(QB qb, Operator input, Map aliasT } // insert a select operator here used by the ColumnPruner to reduce // the data to shuffle - curr = insertSelectAllPlanForGroupBy(curr); + curr = genSelectAllDesc(curr); // Check and transform group by *. This will only happen for select distinct *. // Here the "genSelectPlan" is being leveraged. // The main benefits are (1) remove virtual columns that should @@ -12142,6 +12142,7 @@ Operator genWindowingPlan(WindowingSpec wSpec, Operator input) throws SemanticEx input = putOpInsertMap(OperatorFactory.getAndMakeChild(ptfDesc, new RowSchema(ptfOpRR.getColumnInfos()), input), ptfOpRR); + input = genSelectAllDesc(input); rr = ptfOpRR; } diff --git ql/src/test/queries/clientpositive/windowing_windowspec.q ql/src/test/queries/clientpositive/windowing_windowspec.q index 6d8ce67..63f97b7 100644 --- ql/src/test/queries/clientpositive/windowing_windowspec.q +++ ql/src/test/queries/clientpositive/windowing_windowspec.q @@ -34,3 +34,7 @@ select f, sum(f) over (partition by ts order by f range between unbounded preced select s, i, round(avg(d) over (partition by s order by i) / 10.0 , 2) from over10k limit 7; select s, i, round((avg(d) over w1 + 10.0) - (avg(d) over w1 - 10.0),2) from over10k window w1 as (partition by s order by i) limit 7; + +set hive.cbo.enable=false; +-- HIVE-9228 +select s, i from ( select s, i, round((avg(d) over w1 + 10.0) - (avg(d) over w1 - 10.0),2) from over10k window w1 as (partition by s order by i)) X limit 7; diff --git ql/src/test/results/clientpositive/windowing_windowspec.q.out ql/src/test/results/clientpositive/windowing_windowspec.q.out index 00af6b8..8d78c22 100644 --- ql/src/test/results/clientpositive/windowing_windowspec.q.out +++ ql/src/test/results/clientpositive/windowing_windowspec.q.out @@ -830,3 +830,20 @@ alice allen 65609 20.0 alice allen 65662 20.0 alice allen 65670 20.0 alice allen 65720 20.0 +PREHOOK: query: -- HIVE-9228 +select s, i from ( select s, i, round((avg(d) over w1 + 10.0) - (avg(d) over w1 - 10.0),2) from over10k window w1 as (partition by s order by i)) X limit 7 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: -- HIVE-9228 +select s, i from ( select s, i, round((avg(d) over w1 + 10.0) - (avg(d) over w1 - 10.0),2) from over10k window w1 as (partition by s order by i)) X limit 7 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +alice allen 65545 +alice allen 65557 +alice allen 65600 +alice allen 65609 +alice allen 65662 +alice allen 65670 +alice allen 65720