From a0e96ad507ea963ff184e169ca0fdf3621790cf1 Mon Sep 17 00:00:00 2001 From: Ashutosh Chauhan Date: Tue, 9 Feb 2016 18:48:07 -0800 Subject: [PATCH] HIVE-13033 : SPDO unnecessarily duplicates columns in key & value of mapper output --- .../hadoop/hive/ql/exec/ReduceSinkOperator.java | 9 +- .../ql/optimizer/SortedDynPartitionOptimizer.java | 135 +++++++++++---------- .../dynpart_sort_opt_vectorization.q.out | 54 ++++----- .../clientpositive/dynpart_sort_optimization.q.out | 78 ++++++------ 4 files changed, 135 insertions(+), 141 deletions(-) diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/ReduceSinkOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/ReduceSinkOperator.java index e692460..eb4e77a 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/ReduceSinkOperator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/ReduceSinkOperator.java @@ -122,7 +122,6 @@ protected transient Object[] cachedValues; protected transient List> distinctColIndices; protected transient Random random; - protected transient int bucketNumber = -1; /** * This two dimensional array holds key data and a corresponding Union object @@ -351,6 +350,7 @@ public void process(Object row, int tag) throws HiveException { populateCachedDistributionKeys(row, 0); // replace bucketing columns with hashcode % numBuckets + int bucketNumber = -1; if (bucketEval != null) { bucketNumber = computeBucketNumber(row, conf.getNumBuckets()); cachedKeys[0][buckColIdxInKey] = new Text(String.valueOf(bucketNumber)); @@ -552,13 +552,6 @@ protected void collect(BytesWritable keyWritable, Writable valueWritable) throws private BytesWritable makeValueWritable(Object row) throws Exception { int length = valueEval.length; - // in case of bucketed table, insert the bucket number as the last column in value - if (bucketEval != null) { - length -= 1; - assert bucketNumber >= 0; - cachedValues[length] = new Text(String.valueOf(bucketNumber)); - } - // Evaluate the value for (int i = 0; i < length; i++) { cachedValues[i] = valueEval[i].evaluate(row); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SortedDynPartitionOptimizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SortedDynPartitionOptimizer.java index 27b0457..3105c07 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SortedDynPartitionOptimizer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SortedDynPartitionOptimizer.java @@ -69,6 +69,7 @@ import com.google.common.collect.Lists; import com.google.common.collect.Maps; +import com.google.common.collect.Sets; /** * When dynamic partitioning (with or without bucketing and sorting) is enabled, this optimization @@ -207,46 +208,39 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, fsOp.getConf().setNumFiles(1); fsOp.getConf().setTotalFiles(1); - // Create ReduceSinkDesc - RowSchema outRS = new RowSchema(fsParent.getSchema()); - ArrayList valColInfo = Lists.newArrayList(fsParent.getSchema().getSignature()); - ArrayList newValueCols = Lists.newArrayList(); - for (ColumnInfo ci : valColInfo) { - newValueCols.add(new ExprNodeColumnDesc(ci)); + ArrayList parentCols = Lists.newArrayList(fsParent.getSchema().getSignature()); + ArrayList allRSCols = Lists.newArrayList(); + for (ColumnInfo ci : parentCols) { + allRSCols.add(new ExprNodeColumnDesc(ci)); } - ReduceSinkDesc rsConf = getReduceSinkDesc(partitionPositions, sortPositions, sortOrder, - newValueCols, bucketColumns, numBuckets, fsParent, fsOp.getConf().getWriteType()); - - if (!bucketColumns.isEmpty()) { - String tableAlias = outRS.getSignature().get(0).getTabAlias(); - ColumnInfo ci = new ColumnInfo(BUCKET_NUMBER_COL_NAME, TypeInfoFactory.stringTypeInfo, - tableAlias, true, true); - outRS.getSignature().add(ci); - } - // Create ReduceSink operator - ReduceSinkOperator rsOp = (ReduceSinkOperator) OperatorFactory.getAndMakeChild( - rsConf, new RowSchema(outRS.getSignature()), fsParent); - List valueColNames = rsConf.getOutputValueColumnNames(); - Map colExprMap = Maps.newHashMap(); - for (int i = 0 ; i < valueColNames.size(); i++) { - colExprMap.put(Utilities.ReduceField.VALUE + "." + valueColNames.get(i), newValueCols.get(i)); - } - rsOp.setColumnExprMap(colExprMap); + ReduceSinkOperator rsOp = getReduceSinkOp(partitionPositions, sortPositions, sortOrder, + allRSCols, bucketColumns, numBuckets, fsParent, fsOp.getConf().getWriteType()); - List valCols = rsConf.getValueCols(); - List descs = new ArrayList(valCols.size()); + List descs = new ArrayList(allRSCols.size()); List colNames = new ArrayList(); String colName; - for (ExprNodeDesc valCol : valCols) { - colName = PlanUtils.stripQuotes(valCol.getExprString()); + for (int i = 0; i < allRSCols.size(); i++) { + ExprNodeDesc col = allRSCols.get(i); + colName = col.getExprString(); colNames.add(colName); - descs.add(new ExprNodeColumnDesc(valCol.getTypeInfo(), ReduceField.VALUE.toString()+"."+colName, null, false)); + if (partitionPositions.contains(i) || sortPositions.contains(i)) { + descs.add(new ExprNodeColumnDesc(col.getTypeInfo(), ReduceField.KEY.toString()+"."+colName, null, false)); + } else { + descs.add(new ExprNodeColumnDesc(col.getTypeInfo(), ReduceField.VALUE.toString()+"."+colName, null, false)); + } + } + RowSchema selRS = new RowSchema(fsParent.getSchema()); + if (!bucketColumns.isEmpty()) { + descs.add(new ExprNodeColumnDesc(TypeInfoFactory.stringTypeInfo, ReduceField.KEY.toString()+".'"+BUCKET_NUMBER_COL_NAME+"'", null, false)); + colNames.add("'"+BUCKET_NUMBER_COL_NAME+"'"); + ColumnInfo ci = new ColumnInfo(BUCKET_NUMBER_COL_NAME, TypeInfoFactory.stringTypeInfo, selRS.getSignature().get(0).getTabAlias(), true, true); + selRS.getSignature().add(ci); + fsParent.getSchema().getSignature().add(ci); } - // Create SelectDesc SelectDesc selConf = new SelectDesc(descs, colNames); - RowSchema selRS = new RowSchema(outRS); + // Create Select Operator SelectOperator selOp = (SelectOperator) OperatorFactory.getAndMakeChild( @@ -264,9 +258,7 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, } // update partition column info in FS descriptor - ArrayList partitionColumns = getPositionsToExprNodes(partitionPositions, rsOp - .getSchema().getSignature()); - fsOp.getConf().setPartitionCols(partitionColumns); + fsOp.getConf().setPartitionCols( rsOp.getConf().getPartitionCols()); LOG.info("Inserted " + rsOp.getOperatorId() + " and " + selOp.getOperatorId() + " as parent of " + fsOp.getOperatorId() + " and child of " + fsParent.getOperatorId()); @@ -383,8 +375,8 @@ private boolean removeRSInsertedByEnforceBucketing(FileSinkOperator fsOp) { return posns; } - public ReduceSinkDesc getReduceSinkDesc(List partitionPositions, - List sortPositions, List sortOrder, ArrayList newValueCols, + public ReduceSinkOperator getReduceSinkOp(List partitionPositions, + List sortPositions, List sortOrder, ArrayList allCols, ArrayList bucketColumns, int numBuckets, Operator parent, AcidUtils.Operation writeType) { @@ -392,8 +384,8 @@ public ReduceSinkDesc getReduceSinkDesc(List partitionPositions, // 1) Partition columns // 2) Bucket number column // 3) Sort columns - List keyColsPosInVal = Lists.newArrayList(); - ArrayList newKeyCols = Lists.newArrayList(); + Set keyColsPosInVal = Sets.newLinkedHashSet(); + ArrayList keyCols = Lists.newArrayList(); List newSortOrder = Lists.newArrayList(); int numPartAndBuck = partitionPositions.size(); @@ -425,24 +417,29 @@ public ReduceSinkDesc getReduceSinkDesc(List partitionPositions, } } - ArrayList newPartCols = Lists.newArrayList(); - + Map colExprMap = Maps.newHashMap(); + ArrayList partCols = Lists.newArrayList(); // we will clone here as RS will update bucket column key with its // corresponding with bucket number and hence their OIs for (Integer idx : keyColsPosInVal) { if (idx < 0) { - // add bucket number column to both key and value - ExprNodeConstantDesc encd = new ExprNodeConstantDesc(TypeInfoFactory.stringTypeInfo, - BUCKET_NUMBER_COL_NAME); - newKeyCols.add(encd); - newValueCols.add(encd); + ExprNodeConstantDesc bucketNumCol = new ExprNodeConstantDesc(TypeInfoFactory.stringTypeInfo, BUCKET_NUMBER_COL_NAME); + keyCols.add(bucketNumCol); + colExprMap.put(Utilities.ReduceField.KEY + ".'" +BUCKET_NUMBER_COL_NAME+"'", bucketNumCol); } else { - newKeyCols.add(newValueCols.get(idx).clone()); + keyCols.add(allCols.get(idx).clone()); + } + } + + ArrayList valCols = Lists.newArrayList(); + for (int i = 0; i < allCols.size(); i++) { + if (!keyColsPosInVal.contains(i)) { + valCols.add(allCols.get(i).clone()); } } for (Integer idx : partitionPositions) { - newPartCols.add(newValueCols.get(idx).clone()); + partCols.add(allCols.get(idx).clone()); } // in the absence of SORTED BY clause, the sorted dynamic partition insert @@ -452,41 +449,45 @@ public ReduceSinkDesc getReduceSinkDesc(List partitionPositions, if (parentRSOp != null && parseCtx.getQueryProperties().hasOuterOrderBy()) { String parentRSOpOrder = parentRSOp.getConf().getOrder(); if (parentRSOpOrder != null && !parentRSOpOrder.isEmpty() && sortPositions.isEmpty()) { - newKeyCols.addAll(parentRSOp.getConf().getKeyCols()); + keyCols.addAll(parentRSOp.getConf().getKeyCols()); orderStr += parentRSOpOrder; } } + ArrayList keyColNames = Lists.newArrayList(); + for (ExprNodeDesc keyCol : keyCols) { + String keyColName = keyCol.getExprString(); + keyColNames.add(keyColName); + colExprMap.put(Utilities.ReduceField.KEY + "." +keyColName, keyCol); + } + ArrayList valColNames = Lists.newArrayList(); + for (ExprNodeDesc valCol : valCols) { + String colName =valCol.getExprString(); + valColNames.add(colName); + colExprMap.put(Utilities.ReduceField.VALUE + "." +colName, valCol); + } + // Create Key/Value TableDesc. When the operator plan is split into MR tasks, // the reduce operator will initialize Extract operator with information // from Key and Value TableDesc - List fields = PlanUtils.getFieldSchemasFromColumnList(newKeyCols, - "reducesinkkey"); + List fields = PlanUtils.getFieldSchemasFromColumnList(keyCols, + keyColNames, 0, ""); TableDesc keyTable = PlanUtils.getReduceKeyTableDesc(fields, orderStr); - ArrayList outputKeyCols = Lists.newArrayList(); - for (int i = 0; i < newKeyCols.size(); i++) { - outputKeyCols.add("reducesinkkey" + i); - } - - List outCols = Utilities.getInternalColumnNamesFromSignature(parent.getSchema() - .getSignature()); - ArrayList outValColNames = Lists.newArrayList(outCols); - if (!bucketColumns.isEmpty()) { - outValColNames.add(BUCKET_NUMBER_COL_NAME); - } - List valFields = PlanUtils.getFieldSchemasFromColumnList(newValueCols, - outValColNames, 0, ""); + List valFields = PlanUtils.getFieldSchemasFromColumnList(valCols, + valColNames, 0, ""); TableDesc valueTable = PlanUtils.getReduceValueTableDesc(valFields); List> distinctColumnIndices = Lists.newArrayList(); // Number of reducers is set to default (-1) - ReduceSinkDesc rsConf = new ReduceSinkDesc(newKeyCols, newKeyCols.size(), newValueCols, - outputKeyCols, distinctColumnIndices, outValColNames, -1, newPartCols, -1, keyTable, + ReduceSinkDesc rsConf = new ReduceSinkDesc(keyCols, keyCols.size(), valCols, + keyColNames, distinctColumnIndices, valColNames, -1, partCols, -1, keyTable, valueTable, writeType); rsConf.setBucketCols(bucketColumns); rsConf.setNumBuckets(numBuckets); - - return rsConf; + ReduceSinkOperator op = (ReduceSinkOperator) OperatorFactory.getAndMakeChild( + rsConf, new RowSchema(parent.getSchema()), parent); + op.setColumnExprMap(colExprMap); + return op; } /** diff --git a/ql/src/test/results/clientpositive/dynpart_sort_opt_vectorization.q.out b/ql/src/test/results/clientpositive/dynpart_sort_opt_vectorization.q.out index 8813802..be2b61e 100644 --- a/ql/src/test/results/clientpositive/dynpart_sort_opt_vectorization.q.out +++ b/ql/src/test/results/clientpositive/dynpart_sort_opt_vectorization.q.out @@ -204,10 +204,10 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col4 (type: tinyint) Statistics: Num rows: 1048 Data size: 310873 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float), _col4 (type: tinyint) + value expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float) Reduce Operator Tree: Select Operator - expressions: VALUE._col0 (type: smallint), VALUE._col1 (type: int), VALUE._col2 (type: bigint), VALUE._col3 (type: float), VALUE._col4 (type: tinyint) + expressions: VALUE._col0 (type: smallint), VALUE._col1 (type: int), VALUE._col2 (type: bigint), VALUE._col3 (type: float), KEY._col4 (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 1048 Data size: 310873 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -292,10 +292,10 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col4 (type: tinyint) Statistics: Num rows: 10 Data size: 2960 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float), _col4 (type: tinyint) + value expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float) Reduce Operator Tree: Select Operator - expressions: VALUE._col0 (type: smallint), VALUE._col1 (type: int), VALUE._col2 (type: bigint), VALUE._col3 (type: float), VALUE._col4 (type: tinyint) + expressions: VALUE._col0 (type: smallint), VALUE._col1 (type: int), VALUE._col2 (type: bigint), VALUE._col3 (type: float), KEY._col4 (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 10 Data size: 2960 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -351,12 +351,12 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col4 (type: tinyint) Statistics: Num rows: 1048 Data size: 310873 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float), _col4 (type: tinyint), '_bucket_number' (type: string) + value expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float) Execution mode: vectorized Reduce Operator Tree: Select Operator - expressions: VALUE._col0 (type: smallint), VALUE._col1 (type: int), VALUE._col2 (type: bigint), VALUE._col3 (type: float), VALUE._col4 (type: tinyint), VALUE._bucket_number (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _bucket_number + expressions: VALUE._col0 (type: smallint), VALUE._col1 (type: int), VALUE._col2 (type: bigint), VALUE._col3 (type: float), KEY._col4 (type: tinyint), KEY.'_bucket_number' (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, '_bucket_number' Statistics: Num rows: 1048 Data size: 310873 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -410,12 +410,12 @@ STAGE PLANS: sort order: +++ Map-reduce partition columns: _col4 (type: tinyint) Statistics: Num rows: 1048 Data size: 310873 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float), _col4 (type: tinyint), '_bucket_number' (type: string) + value expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint) Execution mode: vectorized Reduce Operator Tree: Select Operator - expressions: VALUE._col0 (type: smallint), VALUE._col1 (type: int), VALUE._col2 (type: bigint), VALUE._col3 (type: float), VALUE._col4 (type: tinyint), VALUE._bucket_number (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _bucket_number + expressions: VALUE._col0 (type: smallint), VALUE._col1 (type: int), VALUE._col2 (type: bigint), KEY._col3 (type: float), KEY._col4 (type: tinyint), KEY.'_bucket_number' (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, '_bucket_number' Statistics: Num rows: 1048 Data size: 310873 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -562,10 +562,10 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col4 (type: tinyint) Statistics: Num rows: 1048 Data size: 310873 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float), _col4 (type: tinyint) + value expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float) Reduce Operator Tree: Select Operator - expressions: VALUE._col0 (type: smallint), VALUE._col1 (type: int), VALUE._col2 (type: bigint), VALUE._col3 (type: float), VALUE._col4 (type: tinyint) + expressions: VALUE._col0 (type: smallint), VALUE._col1 (type: int), VALUE._col2 (type: bigint), VALUE._col3 (type: float), KEY._col4 (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 1048 Data size: 310873 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -650,10 +650,10 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col4 (type: tinyint) Statistics: Num rows: 10 Data size: 2960 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float), _col4 (type: tinyint) + value expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float) Reduce Operator Tree: Select Operator - expressions: VALUE._col0 (type: smallint), VALUE._col1 (type: int), VALUE._col2 (type: bigint), VALUE._col3 (type: float), VALUE._col4 (type: tinyint) + expressions: VALUE._col0 (type: smallint), VALUE._col1 (type: int), VALUE._col2 (type: bigint), VALUE._col3 (type: float), KEY._col4 (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 10 Data size: 2960 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -709,12 +709,12 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col4 (type: tinyint) Statistics: Num rows: 1048 Data size: 310873 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float), _col4 (type: tinyint), '_bucket_number' (type: string) + value expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float) Execution mode: vectorized Reduce Operator Tree: Select Operator - expressions: VALUE._col0 (type: smallint), VALUE._col1 (type: int), VALUE._col2 (type: bigint), VALUE._col3 (type: float), VALUE._col4 (type: tinyint), VALUE._bucket_number (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _bucket_number + expressions: VALUE._col0 (type: smallint), VALUE._col1 (type: int), VALUE._col2 (type: bigint), VALUE._col3 (type: float), KEY._col4 (type: tinyint), KEY.'_bucket_number' (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, '_bucket_number' Statistics: Num rows: 1048 Data size: 310873 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -768,12 +768,12 @@ STAGE PLANS: sort order: +++ Map-reduce partition columns: _col4 (type: tinyint) Statistics: Num rows: 1048 Data size: 310873 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float), _col4 (type: tinyint), '_bucket_number' (type: string) + value expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint) Execution mode: vectorized Reduce Operator Tree: Select Operator - expressions: VALUE._col0 (type: smallint), VALUE._col1 (type: int), VALUE._col2 (type: bigint), VALUE._col3 (type: float), VALUE._col4 (type: tinyint), VALUE._bucket_number (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _bucket_number + expressions: VALUE._col0 (type: smallint), VALUE._col1 (type: int), VALUE._col2 (type: bigint), KEY._col3 (type: float), KEY._col4 (type: tinyint), KEY.'_bucket_number' (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, '_bucket_number' Statistics: Num rows: 1048 Data size: 310873 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -1381,10 +1381,10 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col4 (type: tinyint) Statistics: Num rows: 1048 Data size: 310873 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float), _col4 (type: tinyint) + value expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float) Reduce Operator Tree: Select Operator - expressions: VALUE._col0 (type: smallint), VALUE._col1 (type: int), VALUE._col2 (type: bigint), VALUE._col3 (type: float), VALUE._col4 (type: tinyint) + expressions: VALUE._col0 (type: smallint), VALUE._col1 (type: int), VALUE._col2 (type: bigint), VALUE._col3 (type: float), KEY._col4 (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 1048 Data size: 310873 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -1471,10 +1471,10 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col4 (type: tinyint) Statistics: Num rows: 10 Data size: 2960 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float), _col4 (type: tinyint) + value expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float) Reduce Operator Tree: Select Operator - expressions: VALUE._col0 (type: smallint), VALUE._col1 (type: int), VALUE._col2 (type: bigint), VALUE._col3 (type: float), VALUE._col4 (type: tinyint) + expressions: VALUE._col0 (type: smallint), VALUE._col1 (type: int), VALUE._col2 (type: bigint), VALUE._col3 (type: float), KEY._col4 (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 10 Data size: 2960 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -2043,12 +2043,12 @@ STAGE PLANS: sort order: +++ Map-reduce partition columns: _col4 (type: tinyint) Statistics: Num rows: 1048 Data size: 310873 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float), _col4 (type: tinyint), '_bucket_number' (type: string) + value expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint) Execution mode: vectorized Reduce Operator Tree: Select Operator - expressions: VALUE._col0 (type: smallint), VALUE._col1 (type: int), VALUE._col2 (type: bigint), VALUE._col3 (type: float), VALUE._col4 (type: tinyint), VALUE._bucket_number (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _bucket_number + expressions: VALUE._col0 (type: smallint), VALUE._col1 (type: int), VALUE._col2 (type: bigint), KEY._col3 (type: float), KEY._col4 (type: tinyint), KEY.'_bucket_number' (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, '_bucket_number' Statistics: Num rows: 1048 Data size: 310873 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false diff --git a/ql/src/test/results/clientpositive/dynpart_sort_optimization.q.out b/ql/src/test/results/clientpositive/dynpart_sort_optimization.q.out index 2105d8b..857d609 100644 --- a/ql/src/test/results/clientpositive/dynpart_sort_optimization.q.out +++ b/ql/src/test/results/clientpositive/dynpart_sort_optimization.q.out @@ -138,10 +138,10 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col4 (type: tinyint) Statistics: Num rows: 4442 Data size: 106611 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float), _col4 (type: tinyint) + value expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float) Reduce Operator Tree: Select Operator - expressions: VALUE._col0 (type: smallint), VALUE._col1 (type: int), VALUE._col2 (type: bigint), VALUE._col3 (type: float), VALUE._col4 (type: tinyint) + expressions: VALUE._col0 (type: smallint), VALUE._col1 (type: int), VALUE._col2 (type: bigint), VALUE._col3 (type: float), KEY._col4 (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 4442 Data size: 106611 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -225,10 +225,10 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col4 (type: tinyint) Statistics: Num rows: 10 Data size: 240 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float), _col4 (type: tinyint) + value expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float) Reduce Operator Tree: Select Operator - expressions: VALUE._col0 (type: smallint), VALUE._col1 (type: int), VALUE._col2 (type: bigint), VALUE._col3 (type: float), VALUE._col4 (type: tinyint) + expressions: VALUE._col0 (type: smallint), VALUE._col1 (type: int), VALUE._col2 (type: bigint), VALUE._col3 (type: float), KEY._col4 (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 10 Data size: 240 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -284,11 +284,11 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col4 (type: tinyint) Statistics: Num rows: 4442 Data size: 106611 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float), _col4 (type: tinyint), '_bucket_number' (type: string) + value expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float) Reduce Operator Tree: Select Operator - expressions: VALUE._col0 (type: smallint), VALUE._col1 (type: int), VALUE._col2 (type: bigint), VALUE._col3 (type: float), VALUE._col4 (type: tinyint), VALUE._bucket_number (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _bucket_number + expressions: VALUE._col0 (type: smallint), VALUE._col1 (type: int), VALUE._col2 (type: bigint), VALUE._col3 (type: float), KEY._col4 (type: tinyint), KEY.'_bucket_number' (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, '_bucket_number' Statistics: Num rows: 4442 Data size: 106611 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -342,11 +342,11 @@ STAGE PLANS: sort order: +++ Map-reduce partition columns: _col4 (type: tinyint) Statistics: Num rows: 4442 Data size: 106611 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float), _col4 (type: tinyint), '_bucket_number' (type: string) + value expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint) Reduce Operator Tree: Select Operator - expressions: VALUE._col0 (type: smallint), VALUE._col1 (type: int), VALUE._col2 (type: bigint), VALUE._col3 (type: float), VALUE._col4 (type: tinyint), VALUE._bucket_number (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _bucket_number + expressions: VALUE._col0 (type: smallint), VALUE._col1 (type: int), VALUE._col2 (type: bigint), KEY._col3 (type: float), KEY._col4 (type: tinyint), KEY.'_bucket_number' (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, '_bucket_number' Statistics: Num rows: 4442 Data size: 106611 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -470,10 +470,10 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col4 (type: tinyint) Statistics: Num rows: 4442 Data size: 106611 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float), _col4 (type: tinyint) + value expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float) Reduce Operator Tree: Select Operator - expressions: VALUE._col0 (type: smallint), VALUE._col1 (type: int), VALUE._col2 (type: bigint), VALUE._col3 (type: float), VALUE._col4 (type: tinyint) + expressions: VALUE._col0 (type: smallint), VALUE._col1 (type: int), VALUE._col2 (type: bigint), VALUE._col3 (type: float), KEY._col4 (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 4442 Data size: 106611 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -557,10 +557,10 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col4 (type: tinyint) Statistics: Num rows: 10 Data size: 240 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float), _col4 (type: tinyint) + value expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float) Reduce Operator Tree: Select Operator - expressions: VALUE._col0 (type: smallint), VALUE._col1 (type: int), VALUE._col2 (type: bigint), VALUE._col3 (type: float), VALUE._col4 (type: tinyint) + expressions: VALUE._col0 (type: smallint), VALUE._col1 (type: int), VALUE._col2 (type: bigint), VALUE._col3 (type: float), KEY._col4 (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 10 Data size: 240 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -616,11 +616,11 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col4 (type: tinyint) Statistics: Num rows: 4442 Data size: 106611 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float), _col4 (type: tinyint), '_bucket_number' (type: string) + value expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float) Reduce Operator Tree: Select Operator - expressions: VALUE._col0 (type: smallint), VALUE._col1 (type: int), VALUE._col2 (type: bigint), VALUE._col3 (type: float), VALUE._col4 (type: tinyint), VALUE._bucket_number (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _bucket_number + expressions: VALUE._col0 (type: smallint), VALUE._col1 (type: int), VALUE._col2 (type: bigint), VALUE._col3 (type: float), KEY._col4 (type: tinyint), KEY.'_bucket_number' (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, '_bucket_number' Statistics: Num rows: 4442 Data size: 106611 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -674,11 +674,11 @@ STAGE PLANS: sort order: +++ Map-reduce partition columns: _col4 (type: tinyint) Statistics: Num rows: 4442 Data size: 106611 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float), _col4 (type: tinyint), '_bucket_number' (type: string) + value expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint) Reduce Operator Tree: Select Operator - expressions: VALUE._col0 (type: smallint), VALUE._col1 (type: int), VALUE._col2 (type: bigint), VALUE._col3 (type: float), VALUE._col4 (type: tinyint), VALUE._bucket_number (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _bucket_number + expressions: VALUE._col0 (type: smallint), VALUE._col1 (type: int), VALUE._col2 (type: bigint), KEY._col3 (type: float), KEY._col4 (type: tinyint), KEY.'_bucket_number' (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, '_bucket_number' Statistics: Num rows: 4442 Data size: 106611 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -1284,10 +1284,10 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col4 (type: tinyint) Statistics: Num rows: 4442 Data size: 106611 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float), _col4 (type: tinyint) + value expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float) Reduce Operator Tree: Select Operator - expressions: VALUE._col0 (type: smallint), VALUE._col1 (type: int), VALUE._col2 (type: bigint), VALUE._col3 (type: float), VALUE._col4 (type: tinyint) + expressions: VALUE._col0 (type: smallint), VALUE._col1 (type: int), VALUE._col2 (type: bigint), VALUE._col3 (type: float), KEY._col4 (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 4442 Data size: 106611 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -1373,10 +1373,10 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col4 (type: tinyint) Statistics: Num rows: 10 Data size: 240 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float), _col4 (type: tinyint) + value expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float) Reduce Operator Tree: Select Operator - expressions: VALUE._col0 (type: smallint), VALUE._col1 (type: int), VALUE._col2 (type: bigint), VALUE._col3 (type: float), VALUE._col4 (type: tinyint) + expressions: VALUE._col0 (type: smallint), VALUE._col1 (type: int), VALUE._col2 (type: bigint), VALUE._col3 (type: float), KEY._col4 (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 10 Data size: 240 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -1942,11 +1942,11 @@ STAGE PLANS: sort order: +++ Map-reduce partition columns: _col4 (type: tinyint) Statistics: Num rows: 4442 Data size: 106611 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float), _col4 (type: tinyint), '_bucket_number' (type: string) + value expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint) Reduce Operator Tree: Select Operator - expressions: VALUE._col0 (type: smallint), VALUE._col1 (type: int), VALUE._col2 (type: bigint), VALUE._col3 (type: float), VALUE._col4 (type: tinyint), VALUE._bucket_number (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _bucket_number + expressions: VALUE._col0 (type: smallint), VALUE._col1 (type: int), VALUE._col2 (type: bigint), KEY._col3 (type: float), KEY._col4 (type: tinyint), KEY.'_bucket_number' (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, '_bucket_number' Statistics: Num rows: 4442 Data size: 106611 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -2302,10 +2302,10 @@ STAGE PLANS: sort order: +++ Map-reduce partition columns: 'foo' (type: string), _col4 (type: tinyint), _col5 (type: int) Statistics: Num rows: 1974 Data size: 53304 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: smallint), _col1 (type: bigint), _col2 (type: float), 'foo' (type: string), _col4 (type: tinyint), _col5 (type: int) + value expressions: _col0 (type: smallint), _col1 (type: bigint), _col2 (type: float) Reduce Operator Tree: Select Operator - expressions: VALUE._col0 (type: smallint), VALUE._col1 (type: bigint), VALUE._col2 (type: float), VALUE._col3 (type: string), VALUE._col4 (type: tinyint), VALUE._col5 (type: int) + expressions: VALUE._col0 (type: smallint), VALUE._col1 (type: bigint), VALUE._col2 (type: float), KEY._col3 (type: string), KEY._col4 (type: tinyint), KEY._col5 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 1974 Data size: 53304 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -2362,10 +2362,10 @@ STAGE PLANS: sort order: +++ Map-reduce partition columns: _col3 (type: string), 27 (type: tinyint), _col5 (type: int) Statistics: Num rows: 429 Data size: 53255 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: smallint), _col1 (type: bigint), _col2 (type: float), _col3 (type: string), 27 (type: tinyint), _col5 (type: int) + value expressions: _col0 (type: smallint), _col1 (type: bigint), _col2 (type: float) Reduce Operator Tree: Select Operator - expressions: VALUE._col0 (type: smallint), VALUE._col1 (type: bigint), VALUE._col2 (type: float), VALUE._col3 (type: string), VALUE._col4 (type: tinyint), VALUE._col5 (type: int) + expressions: VALUE._col0 (type: smallint), VALUE._col1 (type: bigint), VALUE._col2 (type: float), KEY._col3 (type: string), KEY._col4 (type: tinyint), KEY._col5 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 429 Data size: 53255 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -2422,10 +2422,10 @@ STAGE PLANS: sort order: +++ Map-reduce partition columns: _col3 (type: string), _col4 (type: tinyint), 100 (type: int) Statistics: Num rows: 429 Data size: 53255 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: smallint), _col1 (type: bigint), _col2 (type: float), _col3 (type: string), _col4 (type: tinyint), 100 (type: int) + value expressions: _col0 (type: smallint), _col1 (type: bigint), _col2 (type: float) Reduce Operator Tree: Select Operator - expressions: VALUE._col0 (type: smallint), VALUE._col1 (type: bigint), VALUE._col2 (type: float), VALUE._col3 (type: string), VALUE._col4 (type: tinyint), VALUE._col5 (type: int) + expressions: VALUE._col0 (type: smallint), VALUE._col1 (type: bigint), VALUE._col2 (type: float), KEY._col3 (type: string), KEY._col4 (type: tinyint), KEY._col5 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 429 Data size: 53255 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -2482,10 +2482,10 @@ STAGE PLANS: sort order: +++ Map-reduce partition columns: _col3 (type: string), 27 (type: tinyint), 100 (type: int) Statistics: Num rows: 214 Data size: 26565 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: smallint), _col1 (type: bigint), _col2 (type: float), _col3 (type: string), 27 (type: tinyint), 100 (type: int) + value expressions: _col0 (type: smallint), _col1 (type: bigint), _col2 (type: float) Reduce Operator Tree: Select Operator - expressions: VALUE._col0 (type: smallint), VALUE._col1 (type: bigint), VALUE._col2 (type: float), VALUE._col3 (type: string), VALUE._col4 (type: tinyint), VALUE._col5 (type: int) + expressions: VALUE._col0 (type: smallint), VALUE._col1 (type: bigint), VALUE._col2 (type: float), KEY._col3 (type: string), KEY._col4 (type: tinyint), KEY._col5 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 214 Data size: 26565 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -2542,10 +2542,10 @@ STAGE PLANS: sort order: +++ Map-reduce partition columns: 'foo' (type: string), _col4 (type: tinyint), 100 (type: int) Statistics: Num rows: 987 Data size: 26652 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: smallint), _col1 (type: bigint), _col2 (type: float), 'foo' (type: string), _col4 (type: tinyint), 100 (type: int) + value expressions: _col0 (type: smallint), _col1 (type: bigint), _col2 (type: float) Reduce Operator Tree: Select Operator - expressions: VALUE._col0 (type: smallint), VALUE._col1 (type: bigint), VALUE._col2 (type: float), VALUE._col3 (type: string), VALUE._col4 (type: tinyint), VALUE._col5 (type: int) + expressions: VALUE._col0 (type: smallint), VALUE._col1 (type: bigint), VALUE._col2 (type: float), KEY._col3 (type: string), KEY._col4 (type: tinyint), KEY._col5 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 987 Data size: 26652 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -2602,10 +2602,10 @@ STAGE PLANS: sort order: +++ Map-reduce partition columns: 'foo' (type: string), 27 (type: tinyint), _col5 (type: int) Statistics: Num rows: 987 Data size: 26652 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: smallint), _col1 (type: bigint), _col2 (type: float), 'foo' (type: string), 27 (type: tinyint), _col5 (type: int) + value expressions: _col0 (type: smallint), _col1 (type: bigint), _col2 (type: float) Reduce Operator Tree: Select Operator - expressions: VALUE._col0 (type: smallint), VALUE._col1 (type: bigint), VALUE._col2 (type: float), VALUE._col3 (type: string), VALUE._col4 (type: tinyint), VALUE._col5 (type: int) + expressions: VALUE._col0 (type: smallint), VALUE._col1 (type: bigint), VALUE._col2 (type: float), KEY._col3 (type: string), KEY._col4 (type: tinyint), KEY._col5 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 987 Data size: 26652 Basic stats: COMPLETE Column stats: NONE File Output Operator -- 1.7.12.4 (Apple Git-37)