diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java index 97e4059..1167f16 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java @@ -3250,10 +3250,44 @@ private boolean isNullConst(ExprNodeDesc exprNodeDesc) { private VectorExpression getIfExpression(GenericUDFIf genericUDFIf, List childExpr, VectorExpressionDescriptor.Mode mode, TypeInfo returnType) throws HiveException { - if (mode != VectorExpressionDescriptor.Mode.PROJECTION) { + boolean isFilter = false; // Assume. + if (mode == VectorExpressionDescriptor.Mode.FILTER) { + + // Is output type a BOOLEAN? + if (returnType.getCategory() == Category.PRIMITIVE && + ((PrimitiveTypeInfo) returnType).getPrimitiveCategory() == PrimitiveCategory.BOOLEAN) { + isFilter = true; + } else { + return null; + } + } + + // Get a PROJECTION IF expression. + VectorExpression ve = doGetIfExpression(genericUDFIf, childExpr, returnType); + + if (ve == null) { return null; } + if (isFilter) { + + // Wrap the PROJECTION IF expression output with a filter. + SelectColumnIsTrue filterVectorExpr = new SelectColumnIsTrue(ve.getOutputColumnNum()); + + filterVectorExpr.setChildExpressions(new VectorExpression[] {ve}); + + filterVectorExpr.setInputTypeInfos(ve.getOutputTypeInfo()); + filterVectorExpr.setInputDataTypePhysicalVariations(ve.getOutputDataTypePhysicalVariation()); + + return filterVectorExpr; + } else { + return ve; + } + } + + private VectorExpression doGetIfExpression(GenericUDFIf genericUDFIf, List childExpr, + TypeInfo returnType) throws HiveException { + // Add HiveConf variable with 3 modes: // 1) adaptor: Always use VectorUDFAdaptor for IF statements. // @@ -3300,8 +3334,10 @@ private VectorExpression getIfExpression(GenericUDFIf genericUDFIf, List udfClass = genericUDFIf.getClass(); return getVectorExpressionForUdf( - genericUDFIf, udfClass, childExpr, mode, returnType); + genericUDFIf, udfClass, childExpr, VectorExpressionDescriptor.Mode.PROJECTION, returnType); } private VectorExpression getWhenExpression(List childExpr, VectorExpressionDescriptor.Mode mode, TypeInfo returnType) throws HiveException { - if (mode != VectorExpressionDescriptor.Mode.PROJECTION) { - return null; - } final int size = childExpr.size(); final ExprNodeDesc whenDesc = childExpr.get(0); diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorIfStatement.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorIfStatement.java index 666d26c..e7173d7 100644 --- ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorIfStatement.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorIfStatement.java @@ -27,6 +27,7 @@ import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator; import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluatorFactory; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; import org.apache.hadoop.hive.ql.exec.vector.VectorExtractRow; import org.apache.hadoop.hive.ql.exec.vector.VectorRandomBatchSource; import org.apache.hadoop.hive.ql.exec.vector.VectorRandomRowSource; @@ -50,6 +51,7 @@ import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; +import org.apache.hadoop.io.BooleanWritable; import org.apache.hadoop.io.LongWritable; import junit.framework.Assert; @@ -158,16 +160,20 @@ public void testDecimalSmall() throws Exception { @Test public void testDecimal64() throws Exception { - Random random = new Random(12882); + Random random = new Random(238); doIfTestsWithDiffColumnScalar( - random, "decimal(10,4)", ColumnScalarMode.COLUMN_COLUMN, DataTypePhysicalVariation.DECIMAL_64, false, false); + random, "decimal(10,4)", ColumnScalarMode.COLUMN_COLUMN, IfVariation.PROJECTION_IF, + DataTypePhysicalVariation.DECIMAL_64, false, false); doIfTestsWithDiffColumnScalar( - random, "decimal(10,4)", ColumnScalarMode.COLUMN_SCALAR, DataTypePhysicalVariation.DECIMAL_64, false, false); + random, "decimal(10,4)", ColumnScalarMode.COLUMN_SCALAR, IfVariation.PROJECTION_IF, + DataTypePhysicalVariation.DECIMAL_64, false, false); doIfTestsWithDiffColumnScalar( - random, "decimal(10,4)", ColumnScalarMode.SCALAR_COLUMN, DataTypePhysicalVariation.DECIMAL_64, false, false); + random, "decimal(10,4)", ColumnScalarMode.SCALAR_COLUMN, IfVariation.PROJECTION_IF, + DataTypePhysicalVariation.DECIMAL_64, false, false); doIfTestsWithDiffColumnScalar( - random, "decimal(10,4)", ColumnScalarMode.SCALAR_SCALAR, DataTypePhysicalVariation.DECIMAL_64, false, false); + random, "decimal(10,4)", ColumnScalarMode.SCALAR_SCALAR, IfVariation.PROJECTION_IF, + DataTypePhysicalVariation.DECIMAL_64, false, false); } public enum IfStmtTestMode { @@ -187,30 +193,54 @@ public void testDecimal64() throws Exception { static final int count = values().length; } + public enum IfVariation { + FILTER_IF, + PROJECTION_IF; + + static final int count = values().length; + + final boolean isFilter; + IfVariation() { + isFilter = name().startsWith("FILTER"); + } + } + private void doIfTests(Random random, String typeName) throws Exception { - doIfTests(random, typeName, DataTypePhysicalVariation.NONE); + + if (typeName.equals("boolean")) { + doIfTests(random, typeName, IfVariation.FILTER_IF, DataTypePhysicalVariation.NONE); + } + doIfTests(random, typeName, IfVariation.PROJECTION_IF, DataTypePhysicalVariation.NONE); + } - private void doIfTests(Random random, String typeName, + private void doIfTests(Random random, String typeName, IfVariation ifVariation, DataTypePhysicalVariation dataTypePhysicalVariation) throws Exception { doIfTestsWithDiffColumnScalar( - random, typeName, ColumnScalarMode.COLUMN_COLUMN, dataTypePhysicalVariation, false, false); + random, typeName, ColumnScalarMode.COLUMN_COLUMN, ifVariation, + dataTypePhysicalVariation, false, false); doIfTestsWithDiffColumnScalar( - random, typeName, ColumnScalarMode.COLUMN_SCALAR, dataTypePhysicalVariation, false, false); + random, typeName, ColumnScalarMode.COLUMN_SCALAR, ifVariation, + dataTypePhysicalVariation, false, false); doIfTestsWithDiffColumnScalar( - random, typeName, ColumnScalarMode.COLUMN_SCALAR, dataTypePhysicalVariation, false, true); + random, typeName, ColumnScalarMode.COLUMN_SCALAR, ifVariation, + dataTypePhysicalVariation, false, true); doIfTestsWithDiffColumnScalar( - random, typeName, ColumnScalarMode.SCALAR_COLUMN, dataTypePhysicalVariation, false, false); + random, typeName, ColumnScalarMode.SCALAR_COLUMN, ifVariation, + dataTypePhysicalVariation, false, false); doIfTestsWithDiffColumnScalar( - random, typeName, ColumnScalarMode.SCALAR_COLUMN, dataTypePhysicalVariation, true, false); + random, typeName, ColumnScalarMode.SCALAR_COLUMN, ifVariation, + dataTypePhysicalVariation, true, false); doIfTestsWithDiffColumnScalar( - random, typeName, ColumnScalarMode.SCALAR_SCALAR, dataTypePhysicalVariation, false, false); + random, typeName, ColumnScalarMode.SCALAR_SCALAR, ifVariation, + dataTypePhysicalVariation, false, false); } private void doIfTestsWithDiffColumnScalar(Random random, String typeName, - ColumnScalarMode columnScalarMode, DataTypePhysicalVariation dataTypePhysicalVariation, + ColumnScalarMode columnScalarMode, IfVariation ifVariation, + DataTypePhysicalVariation dataTypePhysicalVariation, boolean isNullScalar1, boolean isNullScalar2) throws Exception { @@ -322,6 +352,7 @@ private void doIfTestsWithDiffColumnScalar(Random random, String typeName, case VECTOR_EXPRESSION: doVectorIfTest( typeInfo, + ifVariation, columns, columnNames, rowSource.typeInfos(), @@ -343,7 +374,21 @@ private void doIfTestsWithDiffColumnScalar(Random random, String typeName, for (int v = 1; v < IfStmtTestMode.count; v++) { Object vectorResult = resultObjectsArray[v][i]; - if (expectedResult == null || vectorResult == null) { + if (ifVariation.isFilter && + expectedResult == null && + vectorResult != null) { + // This is OK. + boolean vectorBoolean = ((BooleanWritable) vectorResult).get(); + if (vectorBoolean) { + Assert.fail( + "Row " + i + + " typeName " + typeInfo.getTypeName() + + " " + ifVariation + + " result is NOT NULL and true" + + " does not match row-mode expected result is NULL which means false here" + + " row values " + Arrays.toString(randomRows[i])); + } + } else if (expectedResult == null || vectorResult == null) { if (expectedResult != null || vectorResult != null) { Assert.fail( "Row " + i + " " + IfStmtTestMode.values()[v] + @@ -389,10 +434,7 @@ private void doRowIfTest(TypeInfo typeInfo, List columns, List columns, String[] columnNames, TypeInfo[] typeInfos, DataTypePhysicalVariation[] dataTypePhysicalVariations, @@ -417,6 +460,8 @@ private void doVectorIfTest(TypeInfo typeInfo, Object[] resultObjects) throws Exception { + final boolean isFilter = ifVariation.isFilter; + GenericUDF udf; switch (ifStmtTestMode) { case VECTOR_EXPRESSION: @@ -443,13 +488,31 @@ private void doVectorIfTest(TypeInfo typeInfo, Arrays.asList(typeInfos), Arrays.asList(dataTypePhysicalVariations), hiveConf); - VectorExpression vectorExpression = vectorizationContext.getVectorExpression(exprDesc); + VectorExpression vectorExpression = + vectorizationContext.getVectorExpression( + exprDesc, + (isFilter ? + VectorExpressionDescriptor.Mode.FILTER : + VectorExpressionDescriptor.Mode.PROJECTION)); + + final TypeInfo outputTypeInfo; + final ObjectInspector objectInspector; + if (!isFilter) { + outputTypeInfo = vectorExpression.getOutputTypeInfo(); + objectInspector = + TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo( + outputTypeInfo); + } else { + outputTypeInfo = null; + objectInspector = null; + } if (ifStmtTestMode == IfStmtTestMode.VECTOR_EXPRESSION && vectorExpression instanceof VectorUDFAdaptor) { System.out.println( "*NO NATIVE VECTOR EXPRESSION* typeInfo " + typeInfo.toString() + " ifStmtTestMode " + ifStmtTestMode + + " ifVariation " + ifVariation + " columnScalarMode " + columnScalarMode + " vectorExpression " + vectorExpression.toString()); } @@ -472,31 +535,91 @@ private void doVectorIfTest(TypeInfo typeInfo, VectorizedRowBatch batch = batchContext.createVectorizedRowBatch(); - VectorExtractRow resultVectorExtractRow = new VectorExtractRow(); - resultVectorExtractRow.init( - new TypeInfo[] { typeInfo }, new int[] { vectorExpression.getOutputColumnNum() }); - Object[] scrqtchRow = new Object[1]; - // System.out.println("*VECTOR EXPRESSION* " + vectorExpression.getClass().getSimpleName()); /* System.out.println( "*DEBUG* typeInfo " + typeInfo.toString() + " ifStmtTestMode " + ifStmtTestMode + + " ifVariation " + ifVariation + " columnScalarMode " + columnScalarMode + " vectorExpression " + vectorExpression.toString()); */ + VectorExtractRow resultVectorExtractRow = null; + Object[] scrqtchRow = null; + if (!isFilter) { + resultVectorExtractRow = new VectorExtractRow(); + final int outputColumnNum = vectorExpression.getOutputColumnNum(); + resultVectorExtractRow.init( + new TypeInfo[] { outputTypeInfo }, new int[] { outputColumnNum }); + scrqtchRow = new Object[1]; + } + + boolean copySelectedInUse = false; + int[] copySelected = new int[VectorizedRowBatch.DEFAULT_SIZE]; + batchSource.resetBatchIteration(); int rowIndex = 0; while (true) { if (!batchSource.fillNextBatch(batch)) { break; } + final int originalBatchSize = batch.size; + if (isFilter) { + copySelectedInUse = batch.selectedInUse; + if (batch.selectedInUse) { + System.arraycopy(batch.selected, 0, copySelected, 0, originalBatchSize); + } + } + + // In filter mode, the batch size can be made smaller. vectorExpression.evaluate(batch); - extractResultObjects(batch, rowIndex, resultVectorExtractRow, scrqtchRow, - typeInfo, resultObjects); - rowIndex += batch.size; + + if (!isFilter) { + extractResultObjects(batch, rowIndex, resultVectorExtractRow, scrqtchRow, + objectInspector, resultObjects); + } else { + final int currentBatchSize = batch.size; + if (copySelectedInUse && batch.selectedInUse) { + int selectIndex = 0; + for (int i = 0; i < originalBatchSize; i++) { + final int originalBatchIndex = copySelected[i]; + final boolean booleanResult; + if (selectIndex < currentBatchSize && batch.selected[selectIndex] == originalBatchIndex) { + booleanResult = true; + selectIndex++; + } else { + booleanResult = false; + } + resultObjects[rowIndex + i] = new BooleanWritable(booleanResult); + } + } else if (batch.selectedInUse) { + int selectIndex = 0; + for (int i = 0; i < originalBatchSize; i++) { + final boolean booleanResult; + if (selectIndex < currentBatchSize && batch.selected[selectIndex] == i) { + booleanResult = true; + selectIndex++; + } else { + booleanResult = false; + } + resultObjects[rowIndex + i] = new BooleanWritable(booleanResult); + } + } else if (currentBatchSize == 0) { + // Whole batch got zapped. + for (int i = 0; i < originalBatchSize; i++) { + resultObjects[rowIndex + i] = new BooleanWritable(false); + } + } else { + // Every row kept. + for (int i = 0; i < originalBatchSize; i++) { + resultObjects[rowIndex + i] = new BooleanWritable(true); + } + } + } + + rowIndex += originalBatchSize; } } } diff --git ql/src/test/results/clientpositive/fold_case.q.out ql/src/test/results/clientpositive/fold_case.q.out index 320752b..438829b 100644 --- ql/src/test/results/clientpositive/fold_case.q.out +++ ql/src/test/results/clientpositive/fold_case.q.out @@ -443,6 +443,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) + Execution mode: vectorized Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) diff --git ql/src/test/results/clientpositive/llap/subquery_notin.q.out ql/src/test/results/clientpositive/llap/subquery_notin.q.out index d24bd0a..f5f5f36 100644 --- ql/src/test/results/clientpositive/llap/subquery_notin.q.out +++ ql/src/test/results/clientpositive/llap/subquery_notin.q.out @@ -1489,7 +1489,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 41 Data size: 7544 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: llap + Execution mode: vectorized, llap LLAP IO: no inputs Reducer 2 Execution mode: llap diff --git ql/src/test/results/clientpositive/perf/spark/query21.q.out ql/src/test/results/clientpositive/perf/spark/query21.q.out index 053f597..1673061 100644 --- ql/src/test/results/clientpositive/perf/spark/query21.q.out +++ ql/src/test/results/clientpositive/perf/spark/query21.q.out @@ -212,6 +212,7 @@ STAGE PLANS: Statistics: Num rows: 50024305 Data size: 790375939 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: bigint), _col3 (type: bigint) Reducer 3 + Execution mode: vectorized Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0), sum(VALUE._col1) diff --git ql/src/test/results/clientpositive/perf/spark/query34.q.out ql/src/test/results/clientpositive/perf/spark/query34.q.out index b40081e..ca3e3ac 100644 --- ql/src/test/results/clientpositive/perf/spark/query34.q.out +++ ql/src/test/results/clientpositive/perf/spark/query34.q.out @@ -85,6 +85,7 @@ STAGE PLANS: keys: 0 _col2 (type: int) 1 _col0 (type: int) + Execution mode: vectorized Local Work: Map Reduce Local Work Map 9 diff --git ql/src/test/results/clientpositive/perf/spark/query73.q.out ql/src/test/results/clientpositive/perf/spark/query73.q.out index 20ec874..93f16db 100644 --- ql/src/test/results/clientpositive/perf/spark/query73.q.out +++ ql/src/test/results/clientpositive/perf/spark/query73.q.out @@ -99,6 +99,7 @@ STAGE PLANS: keys: 0 _col2 (type: int) 1 _col0 (type: int) + Execution mode: vectorized Local Work: Map Reduce Local Work Map 8 diff --git ql/src/test/results/clientpositive/perf/spark/query89.q.out ql/src/test/results/clientpositive/perf/spark/query89.q.out index 1acc577..10f38c2 100644 --- ql/src/test/results/clientpositive/perf/spark/query89.q.out +++ ql/src/test/results/clientpositive/perf/spark/query89.q.out @@ -219,6 +219,7 @@ STAGE PLANS: Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string), _col3 (type: int), _col6 (type: decimal(17,2)) Reducer 5 + Execution mode: vectorized Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col1 (type: int), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), VALUE._col2 (type: decimal(17,2)) diff --git ql/src/test/results/clientpositive/perf/tez/query21.q.out ql/src/test/results/clientpositive/perf/tez/query21.q.out index 9c28522..377c20a 100644 --- ql/src/test/results/clientpositive/perf/tez/query21.q.out +++ ql/src/test/results/clientpositive/perf/tez/query21.q.out @@ -70,16 +70,16 @@ Stage-0 limit:100 Stage-1 Reducer 6 vectorized - File Output Operator [FS_93] - Limit [LIM_92] (rows=100 width=15) + File Output Operator [FS_95] + Limit [LIM_94] (rows=100 width=15) Number of rows:100 - Select Operator [SEL_91] (rows=12506076 width=15) + Select Operator [SEL_93] (rows=12506076 width=15) Output:["_col0","_col1","_col2","_col3"] - <-Reducer 5 [SIMPLE_EDGE] - SHUFFLE [RS_28] - Filter Operator [FIL_26] (rows=12506076 width=15) + <-Reducer 5 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_92] + Filter Operator [FIL_91] (rows=12506076 width=15) predicate:CASE WHEN ((_col2 > 0L)) THEN ((UDFToDouble(_col3) / UDFToDouble(_col2)) BETWEEN 0.666667D AND 1.5D) ELSE (null) END - Group By Operator [GBY_25] (rows=25012152 width=15) + Group By Operator [GBY_90] (rows=25012152 width=15) Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)"],keys:KEY._col0, KEY._col1 <-Reducer 4 [SIMPLE_EDGE] SHUFFLE [RS_24] diff --git ql/src/test/results/clientpositive/perf/tez/query34.q.out ql/src/test/results/clientpositive/perf/tez/query34.q.out index 9b7b482..c1cdde0 100644 --- ql/src/test/results/clientpositive/perf/tez/query34.q.out +++ ql/src/test/results/clientpositive/perf/tez/query34.q.out @@ -78,15 +78,15 @@ Stage-0 limit:-1 Stage-1 Reducer 3 vectorized - File Output Operator [FS_135] - Select Operator [SEL_134] (rows=88000001 width=860) + File Output Operator [FS_141] + Select Operator [SEL_140] (rows=88000001 width=860) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] <-Reducer 2 [SIMPLE_EDGE] SHUFFLE [RS_35] Select Operator [SEL_34] (rows=88000001 width=860) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] Merge Join Operator [MERGEJOIN_100] (rows=88000001 width=860) - Conds:RS_103._col0=RS_133._col1(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col7"] + Conds:RS_103._col0=RS_139._col1(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col7"] <-Map 1 [SIMPLE_EDGE] vectorized PARTITION_ONLY_SHUFFLE [RS_103] PartitionCols:_col0 @@ -97,13 +97,13 @@ Stage-0 TableScan [TS_0] (rows=80000000 width=860) default@customer,customer,Tbl:COMPLETE,Col:NONE,Output:["c_customer_sk","c_salutation","c_first_name","c_last_name","c_preferred_cust_flag"] <-Reducer 9 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_133] + SHUFFLE [RS_139] PartitionCols:_col1 - Filter Operator [FIL_132] (rows=42591679 width=88) + Filter Operator [FIL_138] (rows=42591679 width=88) predicate:_col2 BETWEEN 15 AND 20 - Select Operator [SEL_131] (rows=383325119 width=88) + Select Operator [SEL_137] (rows=383325119 width=88) Output:["_col0","_col1","_col2"] - Group By Operator [GBY_130] (rows=383325119 width=88) + Group By Operator [GBY_136] (rows=383325119 width=88) Output:["_col0","_col1","_col2"],aggregations:["count(VALUE._col0)"],keys:KEY._col0, KEY._col1 <-Reducer 8 [SIMPLE_EDGE] SHUFFLE [RS_26] @@ -111,13 +111,13 @@ Stage-0 Group By Operator [GBY_25] (rows=766650239 width=88) Output:["_col0","_col1","_col2"],aggregations:["count()"],keys:_col1, _col4 Merge Join Operator [MERGEJOIN_99] (rows=766650239 width=88) - Conds:RS_21._col3=RS_119._col0(Inner),Output:["_col1","_col4"] + Conds:RS_21._col3=RS_125._col0(Inner),Output:["_col1","_col4"] <-Map 14 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_119] + SHUFFLE [RS_125] PartitionCols:_col0 - Select Operator [SEL_118] (rows=852 width=1910) + Select Operator [SEL_124] (rows=852 width=1910) Output:["_col0"] - Filter Operator [FIL_117] (rows=852 width=1910) + Filter Operator [FIL_123] (rows=852 width=1910) predicate:((s_county) IN ('Mobile County', 'Maverick County', 'Huron County', 'Kittitas County', 'Fairfield County', 'Jackson County', 'Barrow County', 'Pennington County') and s_store_sk is not null) TableScan [TS_12] (rows=1704 width=1910) default@store,store,Tbl:COMPLETE,Col:NONE,Output:["s_store_sk","s_county"] @@ -125,13 +125,13 @@ Stage-0 SHUFFLE [RS_21] PartitionCols:_col3 Merge Join Operator [MERGEJOIN_98] (rows=696954748 width=88) - Conds:RS_18._col2=RS_19._col0(Inner),Output:["_col1","_col3","_col4"] - <-Map 12 [SIMPLE_EDGE] - SHUFFLE [RS_19] + Conds:RS_18._col2=RS_117._col0(Inner),Output:["_col1","_col3","_col4"] + <-Map 12 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_117] PartitionCols:_col0 - Select Operator [SEL_11] (rows=1200 width=107) + Select Operator [SEL_116] (rows=1200 width=107) Output:["_col0"] - Filter Operator [FIL_55] (rows=1200 width=107) + Filter Operator [FIL_115] (rows=1200 width=107) predicate:(((hd_buy_potential = '>10000') or (hd_buy_potential = 'unknown')) and (hd_vehicle_count > 0) and CASE WHEN ((hd_vehicle_count > 0)) THEN (((UDFToDouble(hd_dep_count) / UDFToDouble(hd_vehicle_count)) > 1.2D)) ELSE (null) END and hd_demo_sk is not null) TableScan [TS_9] (rows=7200 width=107) default@household_demographics,household_demographics,Tbl:COMPLETE,Col:NONE,Output:["hd_demo_sk","hd_buy_potential","hd_dep_count","hd_vehicle_count"] @@ -139,7 +139,7 @@ Stage-0 SHUFFLE [RS_18] PartitionCols:_col2 Merge Join Operator [MERGEJOIN_97] (rows=633595212 width=88) - Conds:RS_129._col0=RS_109._col0(Inner),Output:["_col1","_col2","_col3","_col4"] + Conds:RS_135._col0=RS_109._col0(Inner),Output:["_col1","_col2","_col3","_col4"] <-Map 10 [SIMPLE_EDGE] vectorized SHUFFLE [RS_109] PartitionCols:_col0 @@ -150,11 +150,11 @@ Stage-0 TableScan [TS_6] (rows=73049 width=1119) default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year","d_dom"] <-Map 5 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_129] + SHUFFLE [RS_135] PartitionCols:_col0 - Select Operator [SEL_128] (rows=575995635 width=88) + Select Operator [SEL_134] (rows=575995635 width=88) Output:["_col0","_col1","_col2","_col3","_col4"] - Filter Operator [FIL_127] (rows=575995635 width=88) + Filter Operator [FIL_133] (rows=575995635 width=88) predicate:((ss_customer_sk BETWEEN DynamicValue(RS_31_customer_c_customer_sk_min) AND DynamicValue(RS_31_customer_c_customer_sk_max) and in_bloom_filter(ss_customer_sk, DynamicValue(RS_31_customer_c_customer_sk_bloom_filter))) and (ss_hdemo_sk BETWEEN DynamicValue(RS_19_household_demographics_hd_demo_sk_min) AND DynamicValue(RS_19_household_demographics_hd_demo_sk_max) and in_bloom_filter(ss_hdemo_sk, DynamicValue(RS_19_household_demographics_hd_demo_sk_bloom_filter))) and (ss_sold_date_sk BETWEEN DynamicValue(RS_16_date_dim_d_date_sk_min) AND DynamicValue(RS_16_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_16_date_dim_d_date_sk_bloom_filter))) and (ss_store_sk BETWEEN DynamicValue(RS_22_store_s_store_sk_min) AND DynamicValue(RS_22_store_s_store_sk_max) and in_bloom_filter(ss_store_sk, DynamicValue(RS_22_store_s_store_sk_bloom_filter))) and ss_customer_sk is not null and ss_hdemo_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null) TableScan [TS_3] (rows=575995635 width=88) default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_customer_sk","ss_hdemo_sk","ss_store_sk","ss_ticket_number"] @@ -170,30 +170,30 @@ Stage-0 Output:["_col0"] Please refer to the previous Select Operator [SEL_108] <-Reducer 13 [BROADCAST_EDGE] vectorized - BROADCAST [RS_116] - Group By Operator [GBY_115] (rows=1 width=12) + BROADCAST [RS_122] + Group By Operator [GBY_121] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 12 [CUSTOM_SIMPLE_EDGE] - SHUFFLE [RS_69] - Group By Operator [GBY_68] (rows=1 width=12) + <-Map 12 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_120] + Group By Operator [GBY_119] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_67] (rows=1200 width=107) + Select Operator [SEL_118] (rows=1200 width=107) Output:["_col0"] - Please refer to the previous Select Operator [SEL_11] + Please refer to the previous Select Operator [SEL_116] <-Reducer 15 [BROADCAST_EDGE] vectorized - BROADCAST [RS_124] - Group By Operator [GBY_123] (rows=1 width=12) + BROADCAST [RS_130] + Group By Operator [GBY_129] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 14 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_122] - Group By Operator [GBY_121] (rows=1 width=12) + SHUFFLE [RS_128] + Group By Operator [GBY_127] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_120] (rows=852 width=1910) + Select Operator [SEL_126] (rows=852 width=1910) Output:["_col0"] - Please refer to the previous Select Operator [SEL_118] + Please refer to the previous Select Operator [SEL_124] <-Reducer 4 [BROADCAST_EDGE] vectorized - BROADCAST [RS_126] - Group By Operator [GBY_125] (rows=1 width=12) + BROADCAST [RS_132] + Group By Operator [GBY_131] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=80000000)"] <-Map 1 [CUSTOM_SIMPLE_EDGE] vectorized PARTITION_ONLY_SHUFFLE [RS_106] diff --git ql/src/test/results/clientpositive/perf/tez/query73.q.out ql/src/test/results/clientpositive/perf/tez/query73.q.out index cfa5213..39ad200 100644 --- ql/src/test/results/clientpositive/perf/tez/query73.q.out +++ ql/src/test/results/clientpositive/perf/tez/query73.q.out @@ -72,15 +72,15 @@ Stage-0 limit:-1 Stage-1 Reducer 3 vectorized - File Output Operator [FS_135] - Select Operator [SEL_134] (rows=88000001 width=860) + File Output Operator [FS_141] + Select Operator [SEL_140] (rows=88000001 width=860) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] <-Reducer 2 [SIMPLE_EDGE] SHUFFLE [RS_35] Select Operator [SEL_34] (rows=88000001 width=860) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] Merge Join Operator [MERGEJOIN_100] (rows=88000001 width=860) - Conds:RS_103._col0=RS_133._col1(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col7"] + Conds:RS_103._col0=RS_139._col1(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col7"] <-Map 1 [SIMPLE_EDGE] vectorized PARTITION_ONLY_SHUFFLE [RS_103] PartitionCols:_col0 @@ -91,13 +91,13 @@ Stage-0 TableScan [TS_0] (rows=80000000 width=860) default@customer,customer,Tbl:COMPLETE,Col:NONE,Output:["c_customer_sk","c_salutation","c_first_name","c_last_name","c_preferred_cust_flag"] <-Reducer 9 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_133] + SHUFFLE [RS_139] PartitionCols:_col1 - Filter Operator [FIL_132] (rows=42591679 width=88) + Filter Operator [FIL_138] (rows=42591679 width=88) predicate:_col2 BETWEEN 1 AND 5 - Select Operator [SEL_131] (rows=383325119 width=88) + Select Operator [SEL_137] (rows=383325119 width=88) Output:["_col0","_col1","_col2"] - Group By Operator [GBY_130] (rows=383325119 width=88) + Group By Operator [GBY_136] (rows=383325119 width=88) Output:["_col0","_col1","_col2"],aggregations:["count(VALUE._col0)"],keys:KEY._col0, KEY._col1 <-Reducer 8 [SIMPLE_EDGE] SHUFFLE [RS_26] @@ -105,13 +105,13 @@ Stage-0 Group By Operator [GBY_25] (rows=766650239 width=88) Output:["_col0","_col1","_col2"],aggregations:["count()"],keys:_col1, _col4 Merge Join Operator [MERGEJOIN_99] (rows=766650239 width=88) - Conds:RS_21._col3=RS_119._col0(Inner),Output:["_col1","_col4"] + Conds:RS_21._col3=RS_125._col0(Inner),Output:["_col1","_col4"] <-Map 14 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_119] + SHUFFLE [RS_125] PartitionCols:_col0 - Select Operator [SEL_118] (rows=852 width=1910) + Select Operator [SEL_124] (rows=852 width=1910) Output:["_col0"] - Filter Operator [FIL_117] (rows=852 width=1910) + Filter Operator [FIL_123] (rows=852 width=1910) predicate:((s_county) IN ('Mobile County', 'Maverick County', 'Huron County', 'Kittitas County') and s_store_sk is not null) TableScan [TS_12] (rows=1704 width=1910) default@store,store,Tbl:COMPLETE,Col:NONE,Output:["s_store_sk","s_county"] @@ -119,13 +119,13 @@ Stage-0 SHUFFLE [RS_21] PartitionCols:_col3 Merge Join Operator [MERGEJOIN_98] (rows=696954748 width=88) - Conds:RS_18._col2=RS_19._col0(Inner),Output:["_col1","_col3","_col4"] - <-Map 12 [SIMPLE_EDGE] - SHUFFLE [RS_19] + Conds:RS_18._col2=RS_117._col0(Inner),Output:["_col1","_col3","_col4"] + <-Map 12 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_117] PartitionCols:_col0 - Select Operator [SEL_11] (rows=1200 width=107) + Select Operator [SEL_116] (rows=1200 width=107) Output:["_col0"] - Filter Operator [FIL_55] (rows=1200 width=107) + Filter Operator [FIL_115] (rows=1200 width=107) predicate:(((hd_buy_potential = '>10000') or (hd_buy_potential = 'unknown')) and (hd_vehicle_count > 0) and CASE WHEN ((hd_vehicle_count > 0)) THEN (((UDFToDouble(hd_dep_count) / UDFToDouble(hd_vehicle_count)) > 1.0D)) ELSE (null) END and hd_demo_sk is not null) TableScan [TS_9] (rows=7200 width=107) default@household_demographics,household_demographics,Tbl:COMPLETE,Col:NONE,Output:["hd_demo_sk","hd_buy_potential","hd_dep_count","hd_vehicle_count"] @@ -133,7 +133,7 @@ Stage-0 SHUFFLE [RS_18] PartitionCols:_col2 Merge Join Operator [MERGEJOIN_97] (rows=633595212 width=88) - Conds:RS_129._col0=RS_109._col0(Inner),Output:["_col1","_col2","_col3","_col4"] + Conds:RS_135._col0=RS_109._col0(Inner),Output:["_col1","_col2","_col3","_col4"] <-Map 10 [SIMPLE_EDGE] vectorized SHUFFLE [RS_109] PartitionCols:_col0 @@ -144,11 +144,11 @@ Stage-0 TableScan [TS_6] (rows=73049 width=1119) default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year","d_dom"] <-Map 5 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_129] + SHUFFLE [RS_135] PartitionCols:_col0 - Select Operator [SEL_128] (rows=575995635 width=88) + Select Operator [SEL_134] (rows=575995635 width=88) Output:["_col0","_col1","_col2","_col3","_col4"] - Filter Operator [FIL_127] (rows=575995635 width=88) + Filter Operator [FIL_133] (rows=575995635 width=88) predicate:((ss_customer_sk BETWEEN DynamicValue(RS_31_customer_c_customer_sk_min) AND DynamicValue(RS_31_customer_c_customer_sk_max) and in_bloom_filter(ss_customer_sk, DynamicValue(RS_31_customer_c_customer_sk_bloom_filter))) and (ss_hdemo_sk BETWEEN DynamicValue(RS_19_household_demographics_hd_demo_sk_min) AND DynamicValue(RS_19_household_demographics_hd_demo_sk_max) and in_bloom_filter(ss_hdemo_sk, DynamicValue(RS_19_household_demographics_hd_demo_sk_bloom_filter))) and (ss_sold_date_sk BETWEEN DynamicValue(RS_16_date_dim_d_date_sk_min) AND DynamicValue(RS_16_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_16_date_dim_d_date_sk_bloom_filter))) and (ss_store_sk BETWEEN DynamicValue(RS_22_store_s_store_sk_min) AND DynamicValue(RS_22_store_s_store_sk_max) and in_bloom_filter(ss_store_sk, DynamicValue(RS_22_store_s_store_sk_bloom_filter))) and ss_customer_sk is not null and ss_hdemo_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null) TableScan [TS_3] (rows=575995635 width=88) default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_customer_sk","ss_hdemo_sk","ss_store_sk","ss_ticket_number"] @@ -164,30 +164,30 @@ Stage-0 Output:["_col0"] Please refer to the previous Select Operator [SEL_108] <-Reducer 13 [BROADCAST_EDGE] vectorized - BROADCAST [RS_116] - Group By Operator [GBY_115] (rows=1 width=12) + BROADCAST [RS_122] + Group By Operator [GBY_121] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 12 [CUSTOM_SIMPLE_EDGE] - SHUFFLE [RS_69] - Group By Operator [GBY_68] (rows=1 width=12) + <-Map 12 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_120] + Group By Operator [GBY_119] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_67] (rows=1200 width=107) + Select Operator [SEL_118] (rows=1200 width=107) Output:["_col0"] - Please refer to the previous Select Operator [SEL_11] + Please refer to the previous Select Operator [SEL_116] <-Reducer 15 [BROADCAST_EDGE] vectorized - BROADCAST [RS_124] - Group By Operator [GBY_123] (rows=1 width=12) + BROADCAST [RS_130] + Group By Operator [GBY_129] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 14 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_122] - Group By Operator [GBY_121] (rows=1 width=12) + SHUFFLE [RS_128] + Group By Operator [GBY_127] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_120] (rows=852 width=1910) + Select Operator [SEL_126] (rows=852 width=1910) Output:["_col0"] - Please refer to the previous Select Operator [SEL_118] + Please refer to the previous Select Operator [SEL_124] <-Reducer 4 [BROADCAST_EDGE] vectorized - BROADCAST [RS_126] - Group By Operator [GBY_125] (rows=1 width=12) + BROADCAST [RS_132] + Group By Operator [GBY_131] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=80000000)"] <-Map 1 [CUSTOM_SIMPLE_EDGE] vectorized PARTITION_ONLY_SHUFFLE [RS_106] diff --git ql/src/test/results/clientpositive/perf/tez/query89.q.out ql/src/test/results/clientpositive/perf/tez/query89.q.out index ee3374e..fccc653 100644 --- ql/src/test/results/clientpositive/perf/tez/query89.q.out +++ ql/src/test/results/clientpositive/perf/tez/query89.q.out @@ -71,22 +71,22 @@ Stage-0 limit:-1 Stage-1 Reducer 7 vectorized - File Output Operator [FS_119] - Limit [LIM_118] (rows=100 width=88) + File Output Operator [FS_122] + Limit [LIM_121] (rows=100 width=88) Number of rows:100 - Select Operator [SEL_117] (rows=191662559 width=88) + Select Operator [SEL_120] (rows=191662559 width=88) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] - <-Reducer 6 [SIMPLE_EDGE] - SHUFFLE [RS_33] - Select Operator [SEL_30] (rows=191662559 width=88) + <-Reducer 6 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_119] + Select Operator [SEL_118] (rows=191662559 width=88) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"] - Filter Operator [FIL_47] (rows=191662559 width=88) + Filter Operator [FIL_117] (rows=191662559 width=88) predicate:CASE WHEN ((avg_window_0 <> 0)) THEN (((abs((_col6 - avg_window_0)) / avg_window_0) > 0.1)) ELSE (null) END - Select Operator [SEL_29] (rows=383325119 width=88) + Select Operator [SEL_116] (rows=383325119 width=88) Output:["avg_window_0","_col0","_col1","_col2","_col3","_col4","_col5","_col6"] - PTF Operator [PTF_28] (rows=383325119 width=88) + PTF Operator [PTF_115] (rows=383325119 width=88) Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col2 ASC NULLS FIRST, _col0 ASC NULLS FIRST, _col4 ASC NULLS FIRST, _col5 ASC NULLS FIRST","partition by:":"_col2, _col0, _col4, _col5"}] - Select Operator [SEL_27] (rows=383325119 width=88) + Select Operator [SEL_114] (rows=383325119 width=88) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] <-Reducer 5 [SIMPLE_EDGE] vectorized SHUFFLE [RS_113] diff --git ql/src/test/results/clientpositive/spark/subquery_notin.q.out ql/src/test/results/clientpositive/spark/subquery_notin.q.out index 216bc0a..9615450 100644 --- ql/src/test/results/clientpositive/spark/subquery_notin.q.out +++ ql/src/test/results/clientpositive/spark/subquery_notin.q.out @@ -1517,6 +1517,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized Reducer 2 Reduce Operator Tree: Join Operator