diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java index 265dd7e..d444912 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java @@ -78,7 +78,6 @@ import org.apache.hadoop.hive.ql.exec.ArchiveUtils; import org.apache.hadoop.hive.ql.exec.ColumnInfo; import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluatorFactory; -import org.apache.hadoop.hive.ql.exec.FetchOperator; import org.apache.hadoop.hive.ql.exec.FetchTask; import org.apache.hadoop.hive.ql.exec.FileSinkOperator; import org.apache.hadoop.hive.ql.exec.FilterOperator; @@ -129,7 +128,6 @@ import org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException.UnsupportedFeature; import org.apache.hadoop.hive.ql.optimizer.calcite.translator.HiveOpConverterPostProc; import org.apache.hadoop.hive.ql.optimizer.lineage.Generator; -import org.apache.hadoop.hive.ql.optimizer.ppr.PartitionPruner; import org.apache.hadoop.hive.ql.optimizer.unionproc.UnionProcContext; import org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer.TableSpec.SpecType; import org.apache.hadoop.hive.ql.parse.CalcitePlanner.ASTSearcher; @@ -183,7 +181,6 @@ import org.apache.hadoop.hive.ql.plan.MapJoinDesc; import org.apache.hadoop.hive.ql.plan.OperatorDesc; import org.apache.hadoop.hive.ql.plan.PTFDesc; -import org.apache.hadoop.hive.ql.plan.PartitionDesc; import org.apache.hadoop.hive.ql.plan.PlanUtils; import org.apache.hadoop.hive.ql.plan.ReduceSinkDesc; import org.apache.hadoop.hive.ql.plan.ScriptDesc; @@ -7609,10 +7606,20 @@ private Operator genReduceSinkPlan(Operator input, Operator dummy = Operator.createDummy(); dummy.setParentOperators(Arrays.asList(input)); + ArrayList noConstantSortCols = new ArrayList(); + StringBuilder noConstantsSortOrder = new StringBuilder(); + StringBuilder noConstantsNullOrder = new StringBuilder(); ArrayList sortColsBack = new ArrayList(); - for (ExprNodeDesc sortCol : sortCols) { - sortColsBack.add(ExprNodeDescUtils.backtrack(sortCol, dummy, input)); + for (int i = 0; i < sortCols.size(); i++) { + ExprNodeDesc sortCol = sortCols.get(i); + if (!(sortCol instanceof ExprNodeConstantDesc)) { + noConstantSortCols.add(sortCol); + noConstantsSortOrder.append(sortOrder.charAt(i)); + noConstantsNullOrder.append(nullOrder.charAt(i)); + sortColsBack.add(ExprNodeDescUtils.backtrack(sortCol, dummy, input)); + } } + // For the generation of the values expression just get the inputs // signature and generate field expressions for those RowResolver rsRR = new RowResolver(); @@ -7620,6 +7627,7 @@ private Operator genReduceSinkPlan(Operator input, ArrayList valueCols = new ArrayList(); ArrayList valueColsBack = new ArrayList(); Map colExprMap = new HashMap(); + ArrayList constantCols = new ArrayList(); ArrayList columnInfos = inputRR.getColumnInfos(); @@ -7632,6 +7640,12 @@ private Operator genReduceSinkPlan(Operator input, // backtrack can be null when input is script operator ExprNodeDesc valueBack = ExprNodeDescUtils.backtrack(value, dummy, input); + if (valueBack instanceof ExprNodeConstantDesc) { + // ignore, it will be generated by SEL op + index[i] = Integer.MAX_VALUE; + constantCols.add(valueBack); + continue; + } int kindex = valueBack == null ? -1 : ExprNodeDescUtils.indexOf(valueBack, sortColsBack); if (kindex >= 0) { index[i] = kindex; @@ -7668,8 +7682,9 @@ private Operator genReduceSinkPlan(Operator input, dummy.setParentOperators(null); - ReduceSinkDesc rsdesc = PlanUtils.getReduceSinkDesc(sortCols, valueCols, outputColumns, - false, -1, partitionCols, sortOrder, nullOrder, numReducers, acidOp); + ReduceSinkDesc rsdesc = PlanUtils.getReduceSinkDesc(noConstantSortCols, valueCols, outputColumns, + false, -1, partitionCols, noConstantsSortOrder.toString(), noConstantsNullOrder.toString(), + numReducers, acidOp); Operator interim = putOpInsertMap(OperatorFactory.getAndMakeChild(rsdesc, new RowSchema(rsRR.getColumnInfos()), input), rsRR); @@ -7688,23 +7703,29 @@ private Operator genReduceSinkPlan(Operator input, ArrayList selOutputCols = new ArrayList(); Map selColExprMap = new HashMap(); + Iterator constants = constantCols.iterator(); for (int i = 0; i < index.length; i++) { ColumnInfo prev = columnInfos.get(i); String[] nm = inputRR.reverseLookup(prev.getInternalName()); String[] nm2 = inputRR.getAlternateMappings(prev.getInternalName()); ColumnInfo info = new ColumnInfo(prev); - String field; - if (index[i] >= 0) { - field = Utilities.ReduceField.KEY + "." + keyColNames.get(index[i]); + ExprNodeDesc desc; + if (index[i] == Integer.MAX_VALUE) { + desc = constants.next(); } else { - field = Utilities.ReduceField.VALUE + "." + valueColNames.get(-index[i] - 1); + String field; + if (index[i] >= 0) { + field = Utilities.ReduceField.KEY + "." + keyColNames.get(index[i]); + } else { + field = Utilities.ReduceField.VALUE + "." + valueColNames.get(-index[i] - 1); + } + desc = new ExprNodeColumnDesc(info.getType(), + field, info.getTabAlias(), info.getIsVirtualCol()); } - String internalName = getColumnInternalName(i); - ExprNodeColumnDesc desc = new ExprNodeColumnDesc(info.getType(), - field, info.getTabAlias(), info.getIsVirtualCol()); selCols.add(desc); + String internalName = getColumnInternalName(i); info.setInternalName(internalName); selectRR.put(nm[0], nm[1], info); if (nm2 != null) { diff --git ql/src/test/results/clientpositive/dynamic_rdd_cache.q.out ql/src/test/results/clientpositive/dynamic_rdd_cache.q.out index 86346b3..4088a39 100644 --- ql/src/test/results/clientpositive/dynamic_rdd_cache.q.out +++ ql/src/test/results/clientpositive/dynamic_rdd_cache.q.out @@ -1139,13 +1139,13 @@ STAGE PLANS: Map Operator Tree: TableScan Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: int), 3 (type: int), _col3 (type: double), _col4 (type: double), 4 (type: int), _col8 (type: double), _col9 (type: double) - sort order: ++++++++ + key expressions: _col0 (type: int), _col1 (type: int), _col3 (type: double), _col4 (type: double), _col8 (type: double), _col9 (type: double) + sort order: ++++++ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE value expressions: _col5 (type: int), _col6 (type: int) Reduce Operator Tree: Select Operator - expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: int), KEY.reducesinkkey2 (type: int), KEY.reducesinkkey3 (type: double), KEY.reducesinkkey4 (type: double), VALUE._col0 (type: int), VALUE._col1 (type: int), KEY.reducesinkkey5 (type: int), KEY.reducesinkkey6 (type: double), KEY.reducesinkkey7 (type: double) + expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: int), 3 (type: int), KEY.reducesinkkey2 (type: double), KEY.reducesinkkey3 (type: double), VALUE._col0 (type: int), VALUE._col1 (type: int), 4 (type: int), KEY.reducesinkkey4 (type: double), KEY.reducesinkkey5 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE File Output Operator diff --git ql/src/test/results/clientpositive/vector_coalesce.q.out ql/src/test/results/clientpositive/vector_coalesce.q.out index 21a9122..e21dfcf 100644 --- ql/src/test/results/clientpositive/vector_coalesce.q.out +++ ql/src/test/results/clientpositive/vector_coalesce.q.out @@ -33,14 +33,14 @@ STAGE PLANS: outputColumnNames: _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: null (type: double), _col1 (type: string), _col2 (type: int), _col3 (type: float), _col4 (type: smallint), _col5 (type: string) - sort order: ++++++ + key expressions: _col1 (type: string), _col2 (type: int), _col3 (type: float), _col4 (type: smallint), _col5 (type: string) + sort order: +++++ Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 Execution mode: vectorized Reduce Operator Tree: Select Operator - expressions: KEY.reducesinkkey0 (type: double), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: int), KEY.reducesinkkey3 (type: float), KEY.reducesinkkey4 (type: smallint), KEY.reducesinkkey5 (type: string) + expressions: null (type: double), KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: int), KEY.reducesinkkey2 (type: float), KEY.reducesinkkey3 (type: smallint), KEY.reducesinkkey4 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE Limit @@ -117,14 +117,14 @@ STAGE PLANS: outputColumnNames: _col1, _col2, _col3 Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: null (type: tinyint), _col1 (type: double), _col2 (type: int), _col3 (type: double) - sort order: ++++ + key expressions: _col1 (type: double), _col2 (type: int), _col3 (type: double) + sort order: +++ Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 Execution mode: vectorized Reduce Operator Tree: Select Operator - expressions: KEY.reducesinkkey0 (type: tinyint), KEY.reducesinkkey1 (type: double), KEY.reducesinkkey2 (type: int), KEY.reducesinkkey3 (type: double) + expressions: null (type: tinyint), KEY.reducesinkkey0 (type: double), KEY.reducesinkkey1 (type: int), KEY.reducesinkkey2 (type: double) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE Limit @@ -199,14 +199,13 @@ STAGE PLANS: Select Operator Statistics: Num rows: 3072 Data size: 660491 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: null (type: float), null (type: bigint), 0.0 (type: float) - sort order: +++ + sort order: Statistics: Num rows: 3072 Data size: 660491 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 Execution mode: vectorized Reduce Operator Tree: Select Operator - expressions: KEY.reducesinkkey0 (type: float), KEY.reducesinkkey1 (type: bigint), KEY.reducesinkkey2 (type: float) + expressions: null (type: float), null (type: bigint), 0.0 (type: float) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 3072 Data size: 660491 Basic stats: COMPLETE Column stats: NONE Limit @@ -365,14 +364,13 @@ STAGE PLANS: Select Operator Statistics: Num rows: 3072 Data size: 660491 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: null (type: float), null (type: bigint), null (type: float) - sort order: +++ + sort order: Statistics: Num rows: 3072 Data size: 660491 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 Execution mode: vectorized Reduce Operator Tree: Select Operator - expressions: KEY.reducesinkkey0 (type: float), KEY.reducesinkkey1 (type: bigint), KEY.reducesinkkey0 (type: float) + expressions: null (type: float), null (type: bigint), null (type: float) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 3072 Data size: 660491 Basic stats: COMPLETE Column stats: NONE Limit