From 73a4f3f637c274a1f69cbca0986b61c2d674b572 Mon Sep 17 00:00:00 2001 From: Ashutosh Chauhan Date: Tue, 29 Sep 2015 08:14:40 -0700 Subject: [PATCH] HIVE-12004 : SDPO doesnt set colExprMap correctly on new RS --- .../ql/optimizer/SortedDynPartitionOptimizer.java | 7 +++-- .../correlation/CorrelationUtilities.java | 33 ---------------------- .../dynpart_sort_opt_vectorization.q | 2 -- .../clientpositive/dynpart_sort_optimization.q | 2 -- .../clientpositive/dynpart_sort_optimization2.q | 2 -- 5 files changed, 5 insertions(+), 41 deletions(-) diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SortedDynPartitionOptimizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SortedDynPartitionOptimizer.java index 7bcb797..d58c24d 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SortedDynPartitionOptimizer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SortedDynPartitionOptimizer.java @@ -205,10 +205,8 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, RowSchema outRS = new RowSchema(fsParent.getSchema()); ArrayList valColInfo = Lists.newArrayList(fsParent.getSchema().getSignature()); ArrayList newValueCols = Lists.newArrayList(); - Map colExprMap = Maps.newHashMap(); for (ColumnInfo ci : valColInfo) { newValueCols.add(new ExprNodeColumnDesc(ci)); - colExprMap.put(ci.getInternalName(), newValueCols.get(newValueCols.size() - 1)); } ReduceSinkDesc rsConf = getReduceSinkDesc(partitionPositions, sortPositions, sortOrder, newValueCols, bucketColumns, numBuckets, fsParent, fsOp.getConf().getWriteType()); @@ -223,6 +221,11 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, // Create ReduceSink operator ReduceSinkOperator rsOp = (ReduceSinkOperator) OperatorFactory.getAndMakeChild( rsConf, new RowSchema(outRS.getSignature()), fsParent); + List valueColNames = rsConf.getOutputValueColumnNames(); + Map colExprMap = Maps.newHashMap(); + for (int i = 0 ; i < valueColNames.size(); i++) { + colExprMap.put(Utilities.ReduceField.VALUE + "." + valueColNames.get(i), newValueCols.get(i)); + } rsOp.setColumnExprMap(colExprMap); List valCols = rsConf.getValueCols(); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/CorrelationUtilities.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/CorrelationUtilities.java index 7bb49be..388399c 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/CorrelationUtilities.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/CorrelationUtilities.java @@ -354,45 +354,12 @@ protected static SelectOperator replaceReduceSinkWithSelectOperator(ReduceSinkOp ch.replaceParent(childRS, sel); } - removeChildSelIfApplicable(getSingleChild(childRS), sel, context, procCtx); childRS.setChildOperators(null); childRS.setParentOperators(null); procCtx.addRemovedOperator(childRS); return sel; } - //TODO: ideally this method should be removed in future, as in we need not to rely on removing - // this select operator which likely is introduced by SortedDynPartitionOptimizer. - // NonblockingdedupOptimizer should be able to merge this select Operator with its - // parent. But, that is not working at the moment. See: dynpart_sort_optimization2.q - - private static void removeChildSelIfApplicable(Operator child, SelectOperator sel, - ParseContext context, AbstractCorrelationProcCtx procCtx) throws SemanticException { - - if (!(child instanceof SelectOperator)) { - return; - } - if (child.getColumnExprMap() != null) { - return; - } - - SelectOperator selOp = (SelectOperator) child; - - for (ExprNodeDesc desc : selOp.getConf().getColList()) { - if (!(desc instanceof ExprNodeColumnDesc)) { - return; - } - ExprNodeColumnDesc col = (ExprNodeColumnDesc) desc; - if(!col.getColumn().startsWith(ReduceField.VALUE.toString()+".") || - col.getTabAlias() != null || col.getIsPartitionColOrVirtualCol()){ - return; - } - } - - removeOperator(child, getSingleChild(child), sel, context); - procCtx.addRemovedOperator(child); - } - protected static void removeReduceSinkForGroupBy(ReduceSinkOperator cRS, GroupByOperator cGBYr, ParseContext context, AbstractCorrelationProcCtx procCtx) throws SemanticException { diff --git a/ql/src/test/queries/clientpositive/dynpart_sort_opt_vectorization.q b/ql/src/test/queries/clientpositive/dynpart_sort_opt_vectorization.q index 7e94f23..3d0cdcd 100644 --- a/ql/src/test/queries/clientpositive/dynpart_sort_opt_vectorization.q +++ b/ql/src/test/queries/clientpositive/dynpart_sort_opt_vectorization.q @@ -7,8 +7,6 @@ set hive.exec.dynamic.partition.mode=nonstrict; set hive.vectorized.execution.enabled=true; set hive.enforce.bucketing=false; set hive.enforce.sorting=false; -set hive.exec.submitviachild=true; -set hive.exec.submit.local.task.via.child=true; create table over1k( t tinyint, diff --git a/ql/src/test/queries/clientpositive/dynpart_sort_optimization.q b/ql/src/test/queries/clientpositive/dynpart_sort_optimization.q index ea670e9..a1a87d8 100644 --- a/ql/src/test/queries/clientpositive/dynpart_sort_optimization.q +++ b/ql/src/test/queries/clientpositive/dynpart_sort_optimization.q @@ -6,8 +6,6 @@ set hive.exec.max.dynamic.partitions.pernode=1000; set hive.exec.dynamic.partition.mode=nonstrict; set hive.enforce.bucketing=false; set hive.enforce.sorting=false; -set hive.exec.submitviachild=true; -set hive.exec.submit.local.task.via.child=true; create table over1k( t tinyint, diff --git a/ql/src/test/queries/clientpositive/dynpart_sort_optimization2.q b/ql/src/test/queries/clientpositive/dynpart_sort_optimization2.q index 5a504ec..c18f1cc 100644 --- a/ql/src/test/queries/clientpositive/dynpart_sort_optimization2.q +++ b/ql/src/test/queries/clientpositive/dynpart_sort_optimization2.q @@ -6,8 +6,6 @@ set hive.exec.max.dynamic.partitions.pernode=1000; set hive.exec.dynamic.partition.mode=nonstrict; set hive.enforce.bucketing=false; set hive.enforce.sorting=false; -set hive.exec.submitviachild=true; -set hive.exec.submit.local.task.via.child=true; -- SORT_QUERY_RESULTS -- 1.7.12.4 (Apple Git-37)