diff --git common/src/java/org/apache/hadoop/hive/conf/HiveConf.java common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index 05c2acd..03d5f7b 100644 --- common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -2947,7 +2947,9 @@ private static void populateLlapDaemonVarsSet(Set llapDaemonVarsSetLocal "internal use only, used for creating small group key vectorized row batches to exercise more logic\n" + "The default value is -1 which means don't restrict for testing", true), - + HIVE_VECTORIZATION_TESTING_REUSE_SCRATCH_COLUMNS("hive.vectorized.reuse.scratch.columns", true, + "internal use only. Disable this to debug scratch column state issues", + true), HIVE_VECTORIZATION_COMPLEX_TYPES_ENABLED("hive.vectorized.complex.types.enabled", true, "This flag should be set to true to enable vectorization\n" + "of expressions with complex types.\n" + diff --git itests/src/test/resources/testconfiguration.properties itests/src/test/resources/testconfiguration.properties index 53da72b..b39c25e 100644 --- itests/src/test/resources/testconfiguration.properties +++ itests/src/test/resources/testconfiguration.properties @@ -700,6 +700,7 @@ minillaplocal.query.files=\ vector_number_compare_projection.q,\ vector_partitioned_date_time.q,\ vector_ptf_part_simple.q,\ + vector_reuse_scratchcols.q,\ vector_udf1.q,\ vector_windowing.q,\ vector_windowing_expressions.q,\ diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java index d4974cc..d46eb8d 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java @@ -43,6 +43,7 @@ import org.apache.hadoop.hive.common.type.HiveIntervalYearMonth; import org.apache.hadoop.hive.common.type.HiveVarchar; import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.conf.HiveConf.ConfVars; import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator; import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluatorFactory; import org.apache.hadoop.hive.ql.exec.FunctionInfo; @@ -132,12 +133,20 @@ public static HiveVectorAdaptorUsageMode getHiveConfValue(HiveConf hiveConf) { private HiveVectorAdaptorUsageMode hiveVectorAdaptorUsageMode; + private boolean reuseScratchColumns = + HiveConf.ConfVars.HIVE_VECTORIZATION_TESTING_REUSE_SCRATCH_COLUMNS.defaultBoolVal; + private void setHiveConfVars(HiveConf hiveConf) { hiveVectorAdaptorUsageMode = HiveVectorAdaptorUsageMode.getHiveConfValue(hiveConf); + this.reuseScratchColumns = + HiveConf.getBoolVar(hiveConf, ConfVars.HIVE_VECTORIZATION_TESTING_REUSE_SCRATCH_COLUMNS); + this.ocm.setReuseColumns(reuseScratchColumns); } private void copyHiveConfVars(VectorizationContext vContextEnvironment) { hiveVectorAdaptorUsageMode = vContextEnvironment.hiveVectorAdaptorUsageMode; + this.reuseScratchColumns = vContextEnvironment.reuseScratchColumns; + this.ocm.setReuseColumns(reuseScratchColumns); } // Convenient constructor for initial batch creation takes @@ -265,9 +274,11 @@ public void addInitialColumn(String columnName) { // Finishes the vectorization context after all the initial // columns have been added. + @VisibleForTesting public void finishedAddingInitialColumns() { int firstOutputColumnIndex = projectedColumns.size(); this.ocm = new OutputColumnManager(firstOutputColumnIndex); + this.ocm.setReuseColumns(this.reuseScratchColumns); this.firstOutputColumnIndex = firstOutputColumnIndex; } @@ -392,7 +403,7 @@ public DataTypePhysicalVariation getDataTypePhysicalVariation(int columnNum) thr public static final Pattern mapTypePattern = Pattern.compile("map.*", Pattern.CASE_INSENSITIVE); - //Map column number to type + //Map column number to type (this is always non-null for a useful vec context) private OutputColumnManager ocm; // Set of UDF classes for type casting data types in row-mode. @@ -502,6 +513,7 @@ protected int getInputColumnIndex(ExprNodeColumnDesc colExpr) throws HiveExcepti private static class OutputColumnManager { private final int initialOutputCol; private int outputColCount = 0; + private boolean reuseScratchColumns = true; protected OutputColumnManager(int initialOutputCol) { this.initialOutputCol = initialOutputCol; @@ -569,7 +581,7 @@ private int allocateOutputColumnInternal(String columnType, DataTypePhysicalVari } void freeOutputColumn(int index) { - if (initialOutputCol < 0) { + if (initialOutputCol < 0 || reuseScratchColumns == false) { // This is a test return; } @@ -597,6 +609,12 @@ public DataTypePhysicalVariation getDataTypePhysicalVariation(int columnNum) { } return scratchDataTypePhysicalVariations[columnNum - initialOutputCol]; } + + // Allow debugging by disabling column reuse (input cols are never reused by design, only + // scratch cols are) + public void setReuseColumns(boolean reuseColumns) { + this.reuseScratchColumns = reuseColumns; + } } public int allocateScratchColumn(TypeInfo typeInfo) throws HiveException { diff --git ql/src/test/queries/clientpositive/vector_reuse_scratchcols.q ql/src/test/queries/clientpositive/vector_reuse_scratchcols.q new file mode 100644 index 0000000..e863984 --- /dev/null +++ ql/src/test/queries/clientpositive/vector_reuse_scratchcols.q @@ -0,0 +1,79 @@ +set hive.vectorized.execution.enabled=true; + + +EXPLAIN VECTORIZATION DETAIL +SELECT AVG(cint), + (AVG(cint) + -3728), + (-((AVG(cint) + -3728))), + (-((-((AVG(cint) + -3728))))), + ((-((-((AVG(cint) + -3728))))) * (AVG(cint) + -3728)), + SUM(cdouble), + (-(AVG(cint))), + STDDEV_POP(cint), + (((-((-((AVG(cint) + -3728))))) * (AVG(cint) + -3728)) * (-((-((AVG(cint) + -3728)))))), + STDDEV_SAMP(csmallint), + (-(STDDEV_POP(cint))), + (STDDEV_POP(cint) - (-((-((AVG(cint) + -3728)))))), + ((STDDEV_POP(cint) - (-((-((AVG(cint) + -3728)))))) * STDDEV_POP(cint)), + VAR_SAMP(cint), + AVG(cfloat), + (10.175 - VAR_SAMP(cint)), + (-((10.175 - VAR_SAMP(cint)))), + ((-(STDDEV_POP(cint))) / -563), + STDDEV_SAMP(cint), + (-(((-(STDDEV_POP(cint))) / -563))), + (AVG(cint) / SUM(cdouble)), + MIN(ctinyint), + COUNT(csmallint), + (MIN(ctinyint) / ((-(STDDEV_POP(cint))) / -563)), + (-((AVG(cint) / SUM(cdouble)))) +FROM alltypesorc +WHERE ((762 = cbigint) + OR ((csmallint < cfloat) + AND ((ctimestamp2 > -5) + AND (cdouble != cint))) + OR (cstring1 = 'a') + OR ((cbigint <= -1.389) + AND ((cstring2 != 'a') + AND ((79.553 != cint) + AND (cboolean2 != cboolean1))))); + +set hive.vectorized.reuse.scratch.columns=true; + +EXPLAIN VECTORIZATION DETAIL +SELECT AVG(cint), + (AVG(cint) + -3728), + (-((AVG(cint) + -3728))), + (-((-((AVG(cint) + -3728))))), + ((-((-((AVG(cint) + -3728))))) * (AVG(cint) + -3728)), + SUM(cdouble), + (-(AVG(cint))), + STDDEV_POP(cint), + (((-((-((AVG(cint) + -3728))))) * (AVG(cint) + -3728)) * (-((-((AVG(cint) + -3728)))))), + STDDEV_SAMP(csmallint), + (-(STDDEV_POP(cint))), + (STDDEV_POP(cint) - (-((-((AVG(cint) + -3728)))))), + ((STDDEV_POP(cint) - (-((-((AVG(cint) + -3728)))))) * STDDEV_POP(cint)), + VAR_SAMP(cint), + AVG(cfloat), + (10.175 - VAR_SAMP(cint)), + (-((10.175 - VAR_SAMP(cint)))), + ((-(STDDEV_POP(cint))) / -563), + STDDEV_SAMP(cint), + (-(((-(STDDEV_POP(cint))) / -563))), + (AVG(cint) / SUM(cdouble)), + MIN(ctinyint), + COUNT(csmallint), + (MIN(ctinyint) / ((-(STDDEV_POP(cint))) / -563)), + (-((AVG(cint) / SUM(cdouble)))) +FROM alltypesorc +WHERE ((762 = cbigint) + OR ((csmallint < cfloat) + AND ((ctimestamp2 > -5) + AND (cdouble != cint))) + OR (cstring1 = 'a') + OR ((cbigint <= -1.389) + AND ((cstring2 != 'a') + AND ((79.553 != cint) + AND (cboolean2 != cboolean1))))); + diff --git ql/src/test/results/clientpositive/llap/vector_reuse_scratchcols.q.out ql/src/test/results/clientpositive/llap/vector_reuse_scratchcols.q.out new file mode 100644 index 0000000..6528b6f --- /dev/null +++ ql/src/test/results/clientpositive/llap/vector_reuse_scratchcols.q.out @@ -0,0 +1,412 @@ +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT AVG(cint), + (AVG(cint) + -3728), + (-((AVG(cint) + -3728))), + (-((-((AVG(cint) + -3728))))), + ((-((-((AVG(cint) + -3728))))) * (AVG(cint) + -3728)), + SUM(cdouble), + (-(AVG(cint))), + STDDEV_POP(cint), + (((-((-((AVG(cint) + -3728))))) * (AVG(cint) + -3728)) * (-((-((AVG(cint) + -3728)))))), + STDDEV_SAMP(csmallint), + (-(STDDEV_POP(cint))), + (STDDEV_POP(cint) - (-((-((AVG(cint) + -3728)))))), + ((STDDEV_POP(cint) - (-((-((AVG(cint) + -3728)))))) * STDDEV_POP(cint)), + VAR_SAMP(cint), + AVG(cfloat), + (10.175 - VAR_SAMP(cint)), + (-((10.175 - VAR_SAMP(cint)))), + ((-(STDDEV_POP(cint))) / -563), + STDDEV_SAMP(cint), + (-(((-(STDDEV_POP(cint))) / -563))), + (AVG(cint) / SUM(cdouble)), + MIN(ctinyint), + COUNT(csmallint), + (MIN(ctinyint) / ((-(STDDEV_POP(cint))) / -563)), + (-((AVG(cint) / SUM(cdouble)))) +FROM alltypesorc +WHERE ((762 = cbigint) + OR ((csmallint < cfloat) + AND ((ctimestamp2 > -5) + AND (cdouble != cint))) + OR (cstring1 = 'a') + OR ((cbigint <= -1.389) + AND ((cstring2 != 'a') + AND ((79.553 != cint) + AND (cboolean2 != cboolean1))))) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT AVG(cint), + (AVG(cint) + -3728), + (-((AVG(cint) + -3728))), + (-((-((AVG(cint) + -3728))))), + ((-((-((AVG(cint) + -3728))))) * (AVG(cint) + -3728)), + SUM(cdouble), + (-(AVG(cint))), + STDDEV_POP(cint), + (((-((-((AVG(cint) + -3728))))) * (AVG(cint) + -3728)) * (-((-((AVG(cint) + -3728)))))), + STDDEV_SAMP(csmallint), + (-(STDDEV_POP(cint))), + (STDDEV_POP(cint) - (-((-((AVG(cint) + -3728)))))), + ((STDDEV_POP(cint) - (-((-((AVG(cint) + -3728)))))) * STDDEV_POP(cint)), + VAR_SAMP(cint), + AVG(cfloat), + (10.175 - VAR_SAMP(cint)), + (-((10.175 - VAR_SAMP(cint)))), + ((-(STDDEV_POP(cint))) / -563), + STDDEV_SAMP(cint), + (-(((-(STDDEV_POP(cint))) / -563))), + (AVG(cint) / SUM(cdouble)), + MIN(ctinyint), + COUNT(csmallint), + (MIN(ctinyint) / ((-(STDDEV_POP(cint))) / -563)), + (-((AVG(cint) / SUM(cdouble)))) +FROM alltypesorc +WHERE ((762 = cbigint) + OR ((csmallint < cfloat) + AND ((ctimestamp2 > -5) + AND (cdouble != cint))) + OR (cstring1 = 'a') + OR ((cbigint <= -1.389) + AND ((cstring2 != 'a') + AND ((79.553 != cint) + AND (cboolean2 != cboolean1))))) +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: alltypesorc + Statistics: Num rows: 12288 Data size: 2601650 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprOrExpr(children: FilterLongScalarEqualLongColumn(val 762, col 3:bigint), FilterExprAndExpr(children: FilterDoubleColLessDoubleColumn(col 13:float, col 4:float)(children: CastLongToFloatViaLongToDouble(col 1:smallint) -> 13:float), FilterDoubleColGreaterDoubleScalar(col 13:double, val -5.0)(children: CastTimestampToDouble(col 9:timestamp) -> 13:double), FilterDoubleColNotEqualDoubleColumn(col 5:double, col 13:double)(children: CastLongToDouble(col 2:int) -> 13:double)), FilterStringGroupColEqualStringScalar(col 6:string, val a), FilterExprAndExpr(children: FilterDecimalColLessEqualDecimalScalar(col 14:decimal(22,3), val -1.389)(children: CastLongToDecimal(col 3:bigint) -> 14:decimal(22,3)), FilterStringGroupColNotEqualStringScalar(col 7:string, val a), FilterDecimalScalarNotEqualDecimalColumn(val 79.553, col 15:decimal(13,3))(children: CastLongToDecimal(col 2:int) -> 15:decimal(13,3)), FilterLongColNotEqualLongColumn(col 11:boolean, col 10:boolean))) + predicate: (((CAST( cbigint AS decimal(22,3)) <= -1.389) and (cstring2 <> 'a') and (79.553 <> CAST( cint AS decimal(13,3))) and (cboolean2 <> cboolean1)) or ((UDFToFloat(csmallint) < cfloat) and (UDFToDouble(ctimestamp2) > -5.0) and (cdouble <> UDFToDouble(cint))) or (762 = cbigint) or (cstring1 = 'a')) (type: boolean) + Statistics: Num rows: 5465 Data size: 1157230 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cfloat (type: float), cdouble (type: double) + outputColumnNames: ctinyint, csmallint, cint, cfloat, cdouble + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2, 4, 5] + Statistics: Num rows: 5465 Data size: 1157230 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: avg(cint), sum(cdouble), stddev_pop(cint), stddev_samp(csmallint), var_samp(cint), avg(cfloat), stddev_samp(cint), min(ctinyint), count(csmallint) + Group By Vectorization: + aggregators: VectorUDAFAvgLong(col 2:int) -> struct, VectorUDAFSumDouble(col 5:double) -> double, VectorUDAFVarLong(col 2:int) -> struct aggregation: stddev_pop, VectorUDAFVarLong(col 1:smallint) -> struct aggregation: stddev_samp, VectorUDAFVarLong(col 2:int) -> struct aggregation: var_samp, VectorUDAFAvgDouble(col 4:float) -> struct, VectorUDAFVarLong(col 2:int) -> struct aggregation: stddev_samp, VectorUDAFMinLong(col 0:tinyint) -> tinyint, VectorUDAFCount(col 1:smallint) -> bigint + className: VectorGroupByOperator + groupByMode: HASH + native: false + vectorProcessingMode: HASH + projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8] + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkEmptyKeyOperator + keyColumnNums: [] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8] + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: double), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct), _col7 (type: tinyint), _col8 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 12 + includeColumns: [0, 1, 2, 3, 4, 5, 6, 7, 9, 10, 11] + dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean + partitionColumnCount: 0 + scratchColumnTypeNames: [double, decimal(22,3), decimal(13,3)] + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: + reduceColumnSortOrder: + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 9 + dataColumns: VALUE._col0:struct, VALUE._col1:double, VALUE._col2:struct, VALUE._col3:struct, VALUE._col4:struct, VALUE._col5:struct, VALUE._col6:struct, VALUE._col7:tinyint, VALUE._col8:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Group By Operator + aggregations: avg(VALUE._col0), sum(VALUE._col1), stddev_pop(VALUE._col2), stddev_samp(VALUE._col3), var_samp(VALUE._col4), avg(VALUE._col5), stddev_samp(VALUE._col6), min(VALUE._col7), count(VALUE._col8) + Group By Vectorization: + aggregators: VectorUDAFAvgFinal(col 0:struct) -> double, VectorUDAFSumDouble(col 1:double) -> double, VectorUDAFVarFinal(col 2:struct) -> double aggregation: stddev_pop, VectorUDAFVarFinal(col 3:struct) -> double aggregation: stddev_samp, VectorUDAFVarFinal(col 4:struct) -> double aggregation: var_samp, VectorUDAFAvgFinal(col 5:struct) -> double, VectorUDAFVarFinal(col 6:struct) -> double aggregation: stddev_samp, VectorUDAFMinLong(col 7:tinyint) -> tinyint, VectorUDAFCountMerge(col 8:bigint) -> bigint + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: GLOBAL + projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8] + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 68 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: double), (_col0 + -3728.0) (type: double), (- (_col0 + -3728.0)) (type: double), (- (- (_col0 + -3728.0))) (type: double), ((- (- (_col0 + -3728.0))) * (_col0 + -3728.0)) (type: double), _col1 (type: double), (- _col0) (type: double), _col2 (type: double), (((- (- (_col0 + -3728.0))) * (_col0 + -3728.0)) * (- (- (_col0 + -3728.0)))) (type: double), _col3 (type: double), (- _col2) (type: double), (_col2 - (- (- (_col0 + -3728.0)))) (type: double), ((_col2 - (- (- (_col0 + -3728.0)))) * _col2) (type: double), _col4 (type: double), _col5 (type: double), (10.175 - _col4) (type: double), (- (10.175 - _col4)) (type: double), ((- _col2) / -563.0) (type: double), _col6 (type: double), (- ((- _col2) / -563.0)) (type: double), (_col0 / _col1) (type: double), _col7 (type: tinyint), _col8 (type: bigint), (UDFToDouble(_col7) / ((- _col2) / -563.0)) (type: double), (- (_col0 / _col1)) (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 9, 11, 10, 14, 1, 12, 2, 15, 3, 13, 17, 16, 4, 5, 18, 20, 21, 6, 19, 22, 7, 8, 24, 25] + selectExpressions: DoubleColAddDoubleScalar(col 0:double, val -3728.0) -> 9:double, DoubleColUnaryMinus(col 10:double)(children: DoubleColAddDoubleScalar(col 0:double, val -3728.0) -> 10:double) -> 11:double, DoubleColUnaryMinus(col 12:double)(children: DoubleColUnaryMinus(col 10:double)(children: DoubleColAddDoubleScalar(col 0:double, val -3728.0) -> 10:double) -> 12:double) -> 10:double, DoubleColMultiplyDoubleColumn(col 12:double, col 13:double)(children: DoubleColUnaryMinus(col 13:double)(children: DoubleColUnaryMinus(col 12:double)(children: DoubleColAddDoubleScalar(col 0:double, val -3728.0) -> 12:double) -> 13:double) -> 12:double, DoubleColAddDoubleScalar(col 0:double, val -3728.0) -> 13:double) -> 14:double, DoubleColUnaryMinus(col 0:double) -> 12:double, DoubleColMultiplyDoubleColumn(col 16:double, col 13:double)(children: DoubleColMultiplyDoubleColumn(col 13:double, col 15:double)(children: DoubleColUnaryMinus(col 15:double)(children: DoubleColUnaryMinus(col 13:double)(children: DoubleColAddDoubleScalar(col 0:double, val -3728.0) -> 13:double) -> 15:double) -> 13:double, DoubleColAddDoubleScalar(col 0:double, val -3728.0) -> 15:double) -> 16:double, DoubleColUnaryMinus(col 15:double)(children: DoubleColUnaryMinus(col 13:double)(children: DoubleColAddDoubleScalar(col 0:double, val -3728.0) -> 13:double) -> 15:double) -> 13:double) -> 15:double, DoubleColUnaryMinus(col 2:double) -> 13:double, DoubleColSubtractDoubleColumn(col 2:double, col 16:double)(children: DoubleColUnaryMinus(col 17:double)(children: DoubleColUnaryMinus(col 16:double)(children: DoubleColAddDoubleScalar(col 0:double, val -3728.0) -> 16:double) -> 17:double) -> 16:double) -> 17:double, DoubleColMultiplyDoubleColumn(col 18:double, col 2:double)(children: DoubleColSubtractDoubleColumn(col 2:double, col 16:double)(children: DoubleColUnaryMinus(col 18:double)(children: DoubleColUnaryMinus(col 16:double)(children: DoubleColAddDoubleScalar(col 0:double, val -3728.0) -> 16:double) -> 18:double) -> 16:double) -> 18:double) -> 16:double, DoubleScalarSubtractDoubleColumn(val 10.175, col 4:double) -> 18:double, DoubleColUnaryMinus(col 19:double)(children: DoubleScalarSubtractDoubleColumn(val 10.175, col 4:double) -> 19:double) -> 20:double, DoubleColDivideDoubleScalar(col 19:double, val -563.0)(children: DoubleColUnaryMinus(col 2:double) -> 19:double) -> 21:double, DoubleColUnaryMinus(col 22:double)(children: DoubleColDivideDoubleScalar(col 19:double, val -563.0)(children: DoubleColUnaryMinus(col 2:double) -> 19:double) -> 22:double) -> 19:double, DoubleColDivideDoubleColumn(col 0:double, col 1:double) -> 22:double, DoubleColDivideDoubleColumn(col 23:double, col 25:double)(children: CastLongToDouble(col 7:tinyint) -> 23:double, DoubleColDivideDoubleScalar(col 24:double, val -563.0)(children: DoubleColUnaryMinus(col 2:double) -> 24:double) -> 25:double) -> 24:double, DoubleColUnaryMinus(col 23:double)(children: DoubleColDivideDoubleColumn(col 0:double, col 1:double) -> 23:double) -> 25:double + Statistics: Num rows: 1 Data size: 196 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 196 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT AVG(cint), + (AVG(cint) + -3728), + (-((AVG(cint) + -3728))), + (-((-((AVG(cint) + -3728))))), + ((-((-((AVG(cint) + -3728))))) * (AVG(cint) + -3728)), + SUM(cdouble), + (-(AVG(cint))), + STDDEV_POP(cint), + (((-((-((AVG(cint) + -3728))))) * (AVG(cint) + -3728)) * (-((-((AVG(cint) + -3728)))))), + STDDEV_SAMP(csmallint), + (-(STDDEV_POP(cint))), + (STDDEV_POP(cint) - (-((-((AVG(cint) + -3728)))))), + ((STDDEV_POP(cint) - (-((-((AVG(cint) + -3728)))))) * STDDEV_POP(cint)), + VAR_SAMP(cint), + AVG(cfloat), + (10.175 - VAR_SAMP(cint)), + (-((10.175 - VAR_SAMP(cint)))), + ((-(STDDEV_POP(cint))) / -563), + STDDEV_SAMP(cint), + (-(((-(STDDEV_POP(cint))) / -563))), + (AVG(cint) / SUM(cdouble)), + MIN(ctinyint), + COUNT(csmallint), + (MIN(ctinyint) / ((-(STDDEV_POP(cint))) / -563)), + (-((AVG(cint) / SUM(cdouble)))) +FROM alltypesorc +WHERE ((762 = cbigint) + OR ((csmallint < cfloat) + AND ((ctimestamp2 > -5) + AND (cdouble != cint))) + OR (cstring1 = 'a') + OR ((cbigint <= -1.389) + AND ((cstring2 != 'a') + AND ((79.553 != cint) + AND (cboolean2 != cboolean1))))) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT AVG(cint), + (AVG(cint) + -3728), + (-((AVG(cint) + -3728))), + (-((-((AVG(cint) + -3728))))), + ((-((-((AVG(cint) + -3728))))) * (AVG(cint) + -3728)), + SUM(cdouble), + (-(AVG(cint))), + STDDEV_POP(cint), + (((-((-((AVG(cint) + -3728))))) * (AVG(cint) + -3728)) * (-((-((AVG(cint) + -3728)))))), + STDDEV_SAMP(csmallint), + (-(STDDEV_POP(cint))), + (STDDEV_POP(cint) - (-((-((AVG(cint) + -3728)))))), + ((STDDEV_POP(cint) - (-((-((AVG(cint) + -3728)))))) * STDDEV_POP(cint)), + VAR_SAMP(cint), + AVG(cfloat), + (10.175 - VAR_SAMP(cint)), + (-((10.175 - VAR_SAMP(cint)))), + ((-(STDDEV_POP(cint))) / -563), + STDDEV_SAMP(cint), + (-(((-(STDDEV_POP(cint))) / -563))), + (AVG(cint) / SUM(cdouble)), + MIN(ctinyint), + COUNT(csmallint), + (MIN(ctinyint) / ((-(STDDEV_POP(cint))) / -563)), + (-((AVG(cint) / SUM(cdouble)))) +FROM alltypesorc +WHERE ((762 = cbigint) + OR ((csmallint < cfloat) + AND ((ctimestamp2 > -5) + AND (cdouble != cint))) + OR (cstring1 = 'a') + OR ((cbigint <= -1.389) + AND ((cstring2 != 'a') + AND ((79.553 != cint) + AND (cboolean2 != cboolean1))))) +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: alltypesorc + Statistics: Num rows: 12288 Data size: 2601650 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprOrExpr(children: FilterLongScalarEqualLongColumn(val 762, col 3:bigint), FilterExprAndExpr(children: FilterDoubleColLessDoubleColumn(col 13:float, col 4:float)(children: CastLongToFloatViaLongToDouble(col 1:smallint) -> 13:float), FilterDoubleColGreaterDoubleScalar(col 14:double, val -5.0)(children: CastTimestampToDouble(col 9:timestamp) -> 14:double), FilterDoubleColNotEqualDoubleColumn(col 5:double, col 15:double)(children: CastLongToDouble(col 2:int) -> 15:double)), FilterStringGroupColEqualStringScalar(col 6:string, val a), FilterExprAndExpr(children: FilterDecimalColLessEqualDecimalScalar(col 16:decimal(22,3), val -1.389)(children: CastLongToDecimal(col 3:bigint) -> 16:decimal(22,3)), FilterStringGroupColNotEqualStringScalar(col 7:string, val a), FilterDecimalScalarNotEqualDecimalColumn(val 79.553, col 17:decimal(13,3))(children: CastLongToDecimal(col 2:int) -> 17:decimal(13,3)), FilterLongColNotEqualLongColumn(col 11:boolean, col 10:boolean))) + predicate: (((CAST( cbigint AS decimal(22,3)) <= -1.389) and (cstring2 <> 'a') and (79.553 <> CAST( cint AS decimal(13,3))) and (cboolean2 <> cboolean1)) or ((UDFToFloat(csmallint) < cfloat) and (UDFToDouble(ctimestamp2) > -5.0) and (cdouble <> UDFToDouble(cint))) or (762 = cbigint) or (cstring1 = 'a')) (type: boolean) + Statistics: Num rows: 5465 Data size: 1157230 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cfloat (type: float), cdouble (type: double) + outputColumnNames: ctinyint, csmallint, cint, cfloat, cdouble + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2, 4, 5] + Statistics: Num rows: 5465 Data size: 1157230 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: avg(cint), sum(cdouble), stddev_pop(cint), stddev_samp(csmallint), var_samp(cint), avg(cfloat), stddev_samp(cint), min(ctinyint), count(csmallint) + Group By Vectorization: + aggregators: VectorUDAFAvgLong(col 2:int) -> struct, VectorUDAFSumDouble(col 5:double) -> double, VectorUDAFVarLong(col 2:int) -> struct aggregation: stddev_pop, VectorUDAFVarLong(col 1:smallint) -> struct aggregation: stddev_samp, VectorUDAFVarLong(col 2:int) -> struct aggregation: var_samp, VectorUDAFAvgDouble(col 4:float) -> struct, VectorUDAFVarLong(col 2:int) -> struct aggregation: stddev_samp, VectorUDAFMinLong(col 0:tinyint) -> tinyint, VectorUDAFCount(col 1:smallint) -> bigint + className: VectorGroupByOperator + groupByMode: HASH + native: false + vectorProcessingMode: HASH + projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8] + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkEmptyKeyOperator + keyColumnNums: [] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8] + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: double), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct), _col7 (type: tinyint), _col8 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 12 + includeColumns: [0, 1, 2, 3, 4, 5, 6, 7, 9, 10, 11] + dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean + partitionColumnCount: 0 + scratchColumnTypeNames: [double, double, double, decimal(22,3), decimal(13,3)] + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: + reduceColumnSortOrder: + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 9 + dataColumns: VALUE._col0:struct, VALUE._col1:double, VALUE._col2:struct, VALUE._col3:struct, VALUE._col4:struct, VALUE._col5:struct, VALUE._col6:struct, VALUE._col7:tinyint, VALUE._col8:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Group By Operator + aggregations: avg(VALUE._col0), sum(VALUE._col1), stddev_pop(VALUE._col2), stddev_samp(VALUE._col3), var_samp(VALUE._col4), avg(VALUE._col5), stddev_samp(VALUE._col6), min(VALUE._col7), count(VALUE._col8) + Group By Vectorization: + aggregators: VectorUDAFAvgFinal(col 0:struct) -> double, VectorUDAFSumDouble(col 1:double) -> double, VectorUDAFVarFinal(col 2:struct) -> double aggregation: stddev_pop, VectorUDAFVarFinal(col 3:struct) -> double aggregation: stddev_samp, VectorUDAFVarFinal(col 4:struct) -> double aggregation: var_samp, VectorUDAFAvgFinal(col 5:struct) -> double, VectorUDAFVarFinal(col 6:struct) -> double aggregation: stddev_samp, VectorUDAFMinLong(col 7:tinyint) -> tinyint, VectorUDAFCountMerge(col 8:bigint) -> bigint + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: GLOBAL + projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8] + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 68 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: double), (_col0 + -3728.0) (type: double), (- (_col0 + -3728.0)) (type: double), (- (- (_col0 + -3728.0))) (type: double), ((- (- (_col0 + -3728.0))) * (_col0 + -3728.0)) (type: double), _col1 (type: double), (- _col0) (type: double), _col2 (type: double), (((- (- (_col0 + -3728.0))) * (_col0 + -3728.0)) * (- (- (_col0 + -3728.0)))) (type: double), _col3 (type: double), (- _col2) (type: double), (_col2 - (- (- (_col0 + -3728.0)))) (type: double), ((_col2 - (- (- (_col0 + -3728.0)))) * _col2) (type: double), _col4 (type: double), _col5 (type: double), (10.175 - _col4) (type: double), (- (10.175 - _col4)) (type: double), ((- _col2) / -563.0) (type: double), _col6 (type: double), (- ((- _col2) / -563.0)) (type: double), (_col0 / _col1) (type: double), _col7 (type: tinyint), _col8 (type: bigint), (UDFToDouble(_col7) / ((- _col2) / -563.0)) (type: double), (- (_col0 / _col1)) (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 9, 11, 14, 19, 1, 20, 2, 29, 3, 30, 34, 39, 4, 5, 40, 42, 44, 6, 47, 48, 7, 8, 52, 54] + selectExpressions: DoubleColAddDoubleScalar(col 0:double, val -3728.0) -> 9:double, DoubleColUnaryMinus(col 10:double)(children: DoubleColAddDoubleScalar(col 0:double, val -3728.0) -> 10:double) -> 11:double, DoubleColUnaryMinus(col 13:double)(children: DoubleColUnaryMinus(col 12:double)(children: DoubleColAddDoubleScalar(col 0:double, val -3728.0) -> 12:double) -> 13:double) -> 14:double, DoubleColMultiplyDoubleColumn(col 17:double, col 18:double)(children: DoubleColUnaryMinus(col 16:double)(children: DoubleColUnaryMinus(col 15:double)(children: DoubleColAddDoubleScalar(col 0:double, val -3728.0) -> 15:double) -> 16:double) -> 17:double, DoubleColAddDoubleScalar(col 0:double, val -3728.0) -> 18:double) -> 19:double, DoubleColUnaryMinus(col 0:double) -> 20:double, DoubleColMultiplyDoubleColumn(col 25:double, col 28:double)(children: DoubleColMultiplyDoubleColumn(col 23:double, col 24:double)(children: DoubleColUnaryMinus(col 22:double)(children: DoubleColUnaryMinus(col 21:double)(children: DoubleColAddDoubleScalar(col 0:double, val -3728.0) -> 21:double) -> 22:double) -> 23:double, DoubleColAddDoubleScalar(col 0:double, val -3728.0) -> 24:double) -> 25:double, DoubleColUnaryMinus(col 27:double)(children: DoubleColUnaryMinus(col 26:double)(children: DoubleColAddDoubleScalar(col 0:double, val -3728.0) -> 26:double) -> 27:double) -> 28:double) -> 29:double, DoubleColUnaryMinus(col 2:double) -> 30:double, DoubleColSubtractDoubleColumn(col 2:double, col 33:double)(children: DoubleColUnaryMinus(col 32:double)(children: DoubleColUnaryMinus(col 31:double)(children: DoubleColAddDoubleScalar(col 0:double, val -3728.0) -> 31:double) -> 32:double) -> 33:double) -> 34:double, DoubleColMultiplyDoubleColumn(col 38:double, col 2:double)(children: DoubleColSubtractDoubleColumn(col 2:double, col 37:double)(children: DoubleColUnaryMinus(col 36:double)(children: DoubleColUnaryMinus(col 35:double)(children: DoubleColAddDoubleScalar(col 0:double, val -3728.0) -> 35:double) -> 36:double) -> 37:double) -> 38:double) -> 39:double, DoubleScalarSubtractDoubleColumn(val 10.175, col 4:double) -> 40:double, DoubleColUnaryMinus(col 41:double)(children: DoubleScalarSubtractDoubleColumn(val 10.175, col 4:double) -> 41:double) -> 42:double, DoubleColDivideDoubleScalar(col 43:double, val -563.0)(children: DoubleColUnaryMinus(col 2:double) -> 43:double) -> 44:double, DoubleColUnaryMinus(col 46:double)(children: DoubleColDivideDoubleScalar(col 45:double, val -563.0)(children: DoubleColUnaryMinus(col 2:double) -> 45:double) -> 46:double) -> 47:double, DoubleColDivideDoubleColumn(col 0:double, col 1:double) -> 48:double, DoubleColDivideDoubleColumn(col 49:double, col 51:double)(children: CastLongToDouble(col 7:tinyint) -> 49:double, DoubleColDivideDoubleScalar(col 50:double, val -563.0)(children: DoubleColUnaryMinus(col 2:double) -> 50:double) -> 51:double) -> 52:double, DoubleColUnaryMinus(col 53:double)(children: DoubleColDivideDoubleColumn(col 0:double, col 1:double) -> 53:double) -> 54:double + Statistics: Num rows: 1 Data size: 196 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 196 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink +