diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/TaskCompiler.java ql/src/java/org/apache/hadoop/hive/ql/parse/TaskCompiler.java index 1ec5774178..cc676c55f8 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/TaskCompiler.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/TaskCompiler.java @@ -42,7 +42,6 @@ import org.apache.hadoop.hive.ql.hooks.ReadEntity; import org.apache.hadoop.hive.ql.hooks.WriteEntity; import org.apache.hadoop.hive.ql.io.AcidUtils; -import org.apache.hadoop.hive.ql.io.orc.OrcInputFormat; import org.apache.hadoop.hive.ql.metadata.Hive; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.metadata.Partition; @@ -68,6 +67,7 @@ import org.apache.hadoop.hive.ql.plan.TableDesc; import org.apache.hadoop.hive.ql.session.SessionState; import org.apache.hadoop.hive.ql.session.SessionState.LogHelper; +import org.apache.hadoop.hive.ql.stats.BasicStatsNoJobTask; import org.apache.hadoop.hive.serde.serdeConstants; import org.apache.hadoop.hive.serde2.DefaultFetchFormatter; import org.apache.hadoop.hive.serde2.NoOpFetchFormatter; @@ -388,9 +388,7 @@ private String extractTableFullName(StatsTask tsk) throws SemanticException { TableSpec tableSpec = new TableSpec(table, partitions); tableScan.getConf().getTableMetadata().setTableSpec(tableSpec); - // Note: this should probably use BasicStatsNoJobTask.canUseFooterScan, but it doesn't check - // Parquet for some reason. I'm keeping the existing behavior for now. - if (inputFormat.equals(OrcInputFormat.class) && !AcidUtils.isTransactionalTable(table)) { + if (BasicStatsNoJobTask.canUseFooterScan(table, inputFormat)) { // For ORC, there is no Tez Job for table stats. StatsWork columnStatsWork = new StatsWork(table, parseContext.getConf()); columnStatsWork.setFooterScan(); diff --git ql/src/test/results/clientpositive/parquet_vectorization_0.q.out ql/src/test/results/clientpositive/parquet_vectorization_0.q.out index e29249058e..833f31e982 100644 --- ql/src/test/results/clientpositive/parquet_vectorization_0.q.out +++ ql/src/test/results/clientpositive/parquet_vectorization_0.q.out @@ -1693,7 +1693,7 @@ STAGE PLANS: name default.alltypesparquet numFiles 1 numRows 12288 - rawDataSize 147456 + rawDataSize 593751 serialization.ddl struct alltypesparquet { byte ctinyint, i16 csmallint, i32 cint, i64 cbigint, float cfloat, double cdouble, string cstring1, string cstring2, timestamp ctimestamp1, timestamp ctimestamp2, bool cboolean1, bool cboolean2} serialization.format 1 serialization.lib org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe @@ -1715,7 +1715,7 @@ STAGE PLANS: name default.alltypesparquet numFiles 1 numRows 12288 - rawDataSize 147456 + rawDataSize 593751 serialization.ddl struct alltypesparquet { byte ctinyint, i16 csmallint, i32 cint, i64 cbigint, float cfloat, double cdouble, string cstring1, string cstring2, timestamp ctimestamp1, timestamp ctimestamp2, bool cboolean1, bool cboolean2} serialization.format 1 serialization.lib org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe @@ -30518,7 +30518,7 @@ STAGE PLANS: name default.alltypesparquet numFiles 1 numRows 12288 - rawDataSize 147456 + rawDataSize 593751 serialization.ddl struct alltypesparquet { byte ctinyint, i16 csmallint, i32 cint, i64 cbigint, float cfloat, double cdouble, string cstring1, string cstring2, timestamp ctimestamp1, timestamp ctimestamp2, bool cboolean1, bool cboolean2} serialization.format 1 serialization.lib org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe @@ -30540,7 +30540,7 @@ STAGE PLANS: name default.alltypesparquet numFiles 1 numRows 12288 - rawDataSize 147456 + rawDataSize 593751 serialization.ddl struct alltypesparquet { byte ctinyint, i16 csmallint, i32 cint, i64 cbigint, float cfloat, double cdouble, string cstring1, string cstring2, timestamp ctimestamp1, timestamp ctimestamp2, bool cboolean1, bool cboolean2} serialization.format 1 serialization.lib org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe @@ -30635,7 +30635,7 @@ STAGE PLANS: name default.alltypesparquet numFiles 1 numRows 12288 - rawDataSize 147456 + rawDataSize 593751 serialization.ddl struct alltypesparquet { byte ctinyint, i16 csmallint, i32 cint, i64 cbigint, float cfloat, double cdouble, string cstring1, string cstring2, timestamp ctimestamp1, timestamp ctimestamp2, bool cboolean1, bool cboolean2} serialization.format 1 serialization.lib org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe @@ -30657,7 +30657,7 @@ STAGE PLANS: name default.alltypesparquet numFiles 1 numRows 12288 - rawDataSize 147456 + rawDataSize 593751 serialization.ddl struct alltypesparquet { byte ctinyint, i16 csmallint, i32 cint, i64 cbigint, float cfloat, double cdouble, string cstring1, string cstring2, timestamp ctimestamp1, timestamp ctimestamp2, bool cboolean1, bool cboolean2} serialization.format 1 serialization.lib org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe @@ -30756,7 +30756,7 @@ STAGE PLANS: name default.alltypesparquet numFiles 1 numRows 12288 - rawDataSize 147456 + rawDataSize 593751 serialization.ddl struct alltypesparquet { byte ctinyint, i16 csmallint, i32 cint, i64 cbigint, float cfloat, double cdouble, string cstring1, string cstring2, timestamp ctimestamp1, timestamp ctimestamp2, bool cboolean1, bool cboolean2} serialization.format 1 serialization.lib org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe @@ -30778,7 +30778,7 @@ STAGE PLANS: name default.alltypesparquet numFiles 1 numRows 12288 - rawDataSize 147456 + rawDataSize 593751 serialization.ddl struct alltypesparquet { byte ctinyint, i16 csmallint, i32 cint, i64 cbigint, float cfloat, double cdouble, string cstring1, string cstring2, timestamp ctimestamp1, timestamp ctimestamp2, bool cboolean1, bool cboolean2} serialization.format 1 serialization.lib org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe @@ -30862,7 +30862,7 @@ STAGE PLANS: name default.alltypesparquet numFiles 1 numRows 12288 - rawDataSize 147456 + rawDataSize 593751 serialization.ddl struct alltypesparquet { byte ctinyint, i16 csmallint, i32 cint, i64 cbigint, float cfloat, double cdouble, string cstring1, string cstring2, timestamp ctimestamp1, timestamp ctimestamp2, bool cboolean1, bool cboolean2} serialization.format 1 serialization.lib org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe @@ -30884,7 +30884,7 @@ STAGE PLANS: name default.alltypesparquet numFiles 1 numRows 12288 - rawDataSize 147456 + rawDataSize 593751 serialization.ddl struct alltypesparquet { byte ctinyint, i16 csmallint, i32 cint, i64 cbigint, float cfloat, double cdouble, string cstring1, string cstring2, timestamp ctimestamp1, timestamp ctimestamp2, bool cboolean1, bool cboolean2} serialization.format 1 serialization.lib org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe diff --git ql/src/test/results/clientpositive/spark/parquet_vectorization_0.q.out ql/src/test/results/clientpositive/spark/parquet_vectorization_0.q.out index 738f19ac57..de2b661e14 100644 --- ql/src/test/results/clientpositive/spark/parquet_vectorization_0.q.out +++ ql/src/test/results/clientpositive/spark/parquet_vectorization_0.q.out @@ -38,7 +38,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesparquet - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 593751 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true Select Operator @@ -48,7 +48,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0] - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 593751 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: min(ctinyint), max(ctinyint), count(ctinyint), count() Group By Vectorization: @@ -196,7 +196,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesparquet - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 593751 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true Select Operator @@ -206,7 +206,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0] - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 593751 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(ctinyint) Group By Vectorization: @@ -363,11 +363,11 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesparquet - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 593751 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ctinyint (type: tinyint), UDFToDouble(ctinyint) (type: double), (UDFToDouble(ctinyint) * UDFToDouble(ctinyint)) (type: double) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 593751 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(_col0), count(_col0), sum(_col2), sum(_col1) mode: hash @@ -506,7 +506,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesparquet - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 593751 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true Select Operator @@ -516,7 +516,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [3] - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 593751 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: min(cbigint), max(cbigint), count(cbigint), count() Group By Vectorization: @@ -664,7 +664,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesparquet - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 593751 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true Select Operator @@ -674,7 +674,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [3] - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 593751 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(cbigint) Group By Vectorization: @@ -831,11 +831,11 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesparquet - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 593751 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cbigint (type: bigint), UDFToDouble(cbigint) (type: double), (UDFToDouble(cbigint) * UDFToDouble(cbigint)) (type: double) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 593751 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(_col0), count(_col0), sum(_col2), sum(_col1) mode: hash @@ -974,7 +974,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesparquet - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 593751 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true Select Operator @@ -984,7 +984,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [4] - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 593751 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: min(cfloat), max(cfloat), count(cfloat), count() Group By Vectorization: @@ -1132,7 +1132,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesparquet - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 593751 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true Select Operator @@ -1142,7 +1142,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [4] - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 593751 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(cfloat) Group By Vectorization: @@ -1299,11 +1299,11 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesparquet - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 593751 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cfloat (type: float), UDFToDouble(cfloat) (type: double), (UDFToDouble(cfloat) * UDFToDouble(cfloat)) (type: double) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 593751 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(_col0), count(_col0), sum(_col2), sum(_col1) mode: hash @@ -1481,7 +1481,7 @@ STAGE PLANS: TableScan alias: alltypesparquet filterExpr: ((cstring2 like '%b%') or (CAST( cint AS decimal(13,3)) <> 79.553) or (UDFToDouble(cbigint) < cdouble) or ((UDFToShort(ctinyint) >= csmallint) and (cboolean2 = 1) and null)) (type: boolean) - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 593751 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true Filter Operator @@ -1490,7 +1490,7 @@ STAGE PLANS: native: true predicateExpression: FilterExprOrExpr(children: FilterStringColLikeStringScalar(col 7:string, pattern %b%), FilterDecimalColNotEqualDecimalScalar(col 13:decimal(13,3), val 79.553)(children: CastLongToDecimal(col 2:int) -> 13:decimal(13,3)), FilterDoubleColLessDoubleColumn(col 14:double, col 5:double)(children: CastLongToDouble(col 3:bigint) -> 14:double), SelectColumnIsTrue(col 17:boolean)(children: VectorUDFAdaptor(((UDFToShort(ctinyint) >= csmallint) and (cboolean2 = 1) and null))(children: LongColGreaterEqualLongColumn(col 0:smallint, col 1:smallint)(children: col 0:tinyint) -> 15:boolean, LongColEqualLongScalar(col 11:boolean, val 1) -> 16:boolean) -> 17:boolean)) predicate: (((UDFToShort(ctinyint) >= csmallint) and (cboolean2 = 1) and null) or (CAST( cint AS decimal(13,3)) <> 79.553) or (UDFToDouble(cbigint) < cdouble) or (cstring2 like '%b%')) (type: boolean) - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 593751 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cbigint (type: bigint), cfloat (type: float), ctinyint (type: tinyint), UDFToDouble(cbigint) (type: double), (UDFToDouble(cbigint) * UDFToDouble(cbigint)) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4 @@ -1499,7 +1499,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [3, 4, 0, 18, 21] selectExpressions: CastLongToDouble(col 3:bigint) -> 18:double, DoubleColMultiplyDoubleColumn(col 19:double, col 20:double)(children: CastLongToDouble(col 3:bigint) -> 19:double, CastLongToDouble(col 3:bigint) -> 20:double) -> 21:double - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 593751 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(_col0), count(_col0), sum(_col4), sum(_col3), count(), sum(_col1), min(_col2) Group By Vectorization: @@ -1668,14 +1668,14 @@ STAGE PLANS: TableScan alias: alltypesparquet filterExpr: ((cstring1 like 'a%') or (cstring1 like 'b%') or (cstring1 like 'c%') or ((length(cstring1) < 50) and (cstring1 like '%n') and (length(cstring1) > 0))) (type: boolean) - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 593751 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: (((length(cstring1) < 50) and (cstring1 like '%n') and (length(cstring1) > 0)) or (cstring1 like 'a%') or (cstring1 like 'b%') or (cstring1 like 'c%')) (type: boolean) - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 593751 Basic stats: COMPLETE Column stats: NONE Select Operator - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 593751 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() mode: hash @@ -1709,7 +1709,7 @@ STAGE PLANS: name default.alltypesparquet numFiles 1 numRows 12288 - rawDataSize 147456 + rawDataSize 593751 serialization.ddl struct alltypesparquet { byte ctinyint, i16 csmallint, i32 cint, i64 cbigint, float cfloat, double cdouble, string cstring1, string cstring2, timestamp ctimestamp1, timestamp ctimestamp2, bool cboolean1, bool cboolean2} serialization.format 1 serialization.lib org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe @@ -1731,7 +1731,7 @@ STAGE PLANS: name default.alltypesparquet numFiles 1 numRows 12288 - rawDataSize 147456 + rawDataSize 593751 serialization.ddl struct alltypesparquet { byte ctinyint, i16 csmallint, i32 cint, i64 cbigint, float cfloat, double cdouble, string cstring1, string cstring2, timestamp ctimestamp1, timestamp ctimestamp2, bool cboolean1, bool cboolean2} serialization.format 1 serialization.lib org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe @@ -30486,22 +30486,22 @@ STAGE PLANS: TableScan alias: alltypesparquet filterExpr: (((cint = 49) and (cfloat = 3.5)) or ((cint = 47) and (cfloat = 2.09)) or ((cint = 45) and (cfloat = 3.02))) (type: boolean) - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 593751 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: (((cint = 45) and (cfloat = 3.02)) or ((cint = 47) and (cfloat = 2.09)) or ((cint = 49) and (cfloat = 3.5))) (type: boolean) - Statistics: Num rows: 9216 Data size: 110592 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 9216 Data size: 445313 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cbigint (type: bigint), cfloat (type: float), cdouble (type: double), cstring1 (type: string), cstring2 (type: string), ctimestamp1 (type: timestamp), ctimestamp2 (type: timestamp), cboolean1 (type: boolean), cboolean2 (type: boolean) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 - Statistics: Num rows: 9216 Data size: 110592 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 9216 Data size: 445313 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 9216 Data size: 110592 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 9216 Data size: 445313 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -30539,7 +30539,7 @@ STAGE PLANS: name default.alltypesparquet numFiles 1 numRows 12288 - rawDataSize 147456 + rawDataSize 593751 serialization.ddl struct alltypesparquet { byte ctinyint, i16 csmallint, i32 cint, i64 cbigint, float cfloat, double cdouble, string cstring1, string cstring2, timestamp ctimestamp1, timestamp ctimestamp2, bool cboolean1, bool cboolean2} serialization.format 1 serialization.lib org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe @@ -30561,7 +30561,7 @@ STAGE PLANS: name default.alltypesparquet numFiles 1 numRows 12288 - rawDataSize 147456 + rawDataSize 593751 serialization.ddl struct alltypesparquet { byte ctinyint, i16 csmallint, i32 cint, i64 cbigint, float cfloat, double cdouble, string cstring1, string cstring2, timestamp ctimestamp1, timestamp ctimestamp2, bool cboolean1, bool cboolean2} serialization.format 1 serialization.lib org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe @@ -30607,22 +30607,22 @@ STAGE PLANS: TableScan alias: alltypesparquet filterExpr: (struct(cint,cfloat)) IN (const struct(49,3.5), const struct(47,2.09), const struct(45,3.02)) (type: boolean) - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 593751 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: (struct(cint,cfloat)) IN (const struct(49,3.5), const struct(47,2.09), const struct(45,3.02)) (type: boolean) - Statistics: Num rows: 9216 Data size: 110592 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 9216 Data size: 445313 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cbigint (type: bigint), cfloat (type: float), cdouble (type: double), cstring1 (type: string), cstring2 (type: string), ctimestamp1 (type: timestamp), ctimestamp2 (type: timestamp), cboolean1 (type: boolean), cboolean2 (type: boolean) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 - Statistics: Num rows: 9216 Data size: 110592 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 9216 Data size: 445313 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 9216 Data size: 110592 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 9216 Data size: 445313 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -30659,7 +30659,7 @@ STAGE PLANS: name default.alltypesparquet numFiles 1 numRows 12288 - rawDataSize 147456 + rawDataSize 593751 serialization.ddl struct alltypesparquet { byte ctinyint, i16 csmallint, i32 cint, i64 cbigint, float cfloat, double cdouble, string cstring1, string cstring2, timestamp ctimestamp1, timestamp ctimestamp2, bool cboolean1, bool cboolean2} serialization.format 1 serialization.lib org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe @@ -30681,7 +30681,7 @@ STAGE PLANS: name default.alltypesparquet numFiles 1 numRows 12288 - rawDataSize 147456 + rawDataSize 593751 serialization.ddl struct alltypesparquet { byte ctinyint, i16 csmallint, i32 cint, i64 cbigint, float cfloat, double cdouble, string cstring1, string cstring2, timestamp ctimestamp1, timestamp ctimestamp2, bool cboolean1, bool cboolean2} serialization.format 1 serialization.lib org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe @@ -30730,22 +30730,22 @@ STAGE PLANS: TableScan alias: alltypesparquet filterExpr: (((cint = 49) or (cfloat = 3.5)) and ((cint = 47) or (cfloat = 2.09)) and ((cint = 45) or (cfloat = 3.02))) (type: boolean) - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 593751 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: (((cint = 45) or (cfloat = 3.02)) and ((cint = 47) or (cfloat = 2.09)) and ((cint = 49) or (cfloat = 3.5))) (type: boolean) - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 593751 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cbigint (type: bigint), cfloat (type: float), cdouble (type: double), cstring1 (type: string), cstring2 (type: string), ctimestamp1 (type: timestamp), ctimestamp2 (type: timestamp), cboolean1 (type: boolean), cboolean2 (type: boolean) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 593751 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 593751 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -30783,7 +30783,7 @@ STAGE PLANS: name default.alltypesparquet numFiles 1 numRows 12288 - rawDataSize 147456 + rawDataSize 593751 serialization.ddl struct alltypesparquet { byte ctinyint, i16 csmallint, i32 cint, i64 cbigint, float cfloat, double cdouble, string cstring1, string cstring2, timestamp ctimestamp1, timestamp ctimestamp2, bool cboolean1, bool cboolean2} serialization.format 1 serialization.lib org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe @@ -30805,7 +30805,7 @@ STAGE PLANS: name default.alltypesparquet numFiles 1 numRows 12288 - rawDataSize 147456 + rawDataSize 593751 serialization.ddl struct alltypesparquet { byte ctinyint, i16 csmallint, i32 cint, i64 cbigint, float cfloat, double cdouble, string cstring1, string cstring2, timestamp ctimestamp1, timestamp ctimestamp2, bool cboolean1, bool cboolean2} serialization.format 1 serialization.lib org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe @@ -30852,24 +30852,24 @@ STAGE PLANS: TableScan alias: alltypesparquet filterExpr: (cstring1) IN ('biology', 'history', 'topology') (type: boolean) - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 593751 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: (cstring1) IN ('biology', 'history', 'topology') (type: boolean) - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 593751 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() keys: cstring1 (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 593751 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) null sort order: a sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 593751 Basic stats: COMPLETE Column stats: NONE tag: -1 value expressions: _col1 (type: bigint) auto parallelism: false @@ -30894,7 +30894,7 @@ STAGE PLANS: name default.alltypesparquet numFiles 1 numRows 12288 - rawDataSize 147456 + rawDataSize 593751 serialization.ddl struct alltypesparquet { byte ctinyint, i16 csmallint, i32 cint, i64 cbigint, float cfloat, double cdouble, string cstring1, string cstring2, timestamp ctimestamp1, timestamp ctimestamp2, bool cboolean1, bool cboolean2} serialization.format 1 serialization.lib org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe @@ -30916,7 +30916,7 @@ STAGE PLANS: name default.alltypesparquet numFiles 1 numRows 12288 - rawDataSize 147456 + rawDataSize 593751 serialization.ddl struct alltypesparquet { byte ctinyint, i16 csmallint, i32 cint, i64 cbigint, float cfloat, double cdouble, string cstring1, string cstring2, timestamp ctimestamp1, timestamp ctimestamp2, bool cboolean1, bool cboolean2} serialization.format 1 serialization.lib org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe @@ -30936,16 +30936,16 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 6144 Data size: 73728 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6144 Data size: 296875 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col1 (type: bigint), _col0 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 6144 Data size: 73728 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6144 Data size: 296875 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: string) null sort order: z sort order: + - Statistics: Num rows: 6144 Data size: 73728 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6144 Data size: 296875 Basic stats: COMPLETE Column stats: NONE tag: -1 value expressions: _col0 (type: bigint) auto parallelism: false @@ -30956,13 +30956,13 @@ STAGE PLANS: Select Operator expressions: VALUE._col0 (type: bigint), KEY.reducesinkkey0 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 6144 Data size: 73728 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6144 Data size: 296875 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 6144 Data size: 73728 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6144 Data size: 296875 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat diff --git ql/src/test/results/clientpositive/spark/parquet_vectorization_1.q.out ql/src/test/results/clientpositive/spark/parquet_vectorization_1.q.out index 7472f662b1..e7d14f3709 100644 --- ql/src/test/results/clientpositive/spark/parquet_vectorization_1.q.out +++ ql/src/test/results/clientpositive/spark/parquet_vectorization_1.q.out @@ -64,7 +64,7 @@ STAGE PLANS: TableScan alias: alltypesparquet filterExpr: (((cdouble > UDFToDouble(ctinyint)) and (cboolean2 > 0)) or (cbigint < UDFToLong(ctinyint)) or (UDFToLong(cint) > cbigint) or (cboolean1 < 0)) (type: boolean) - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 593751 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true Filter Operator @@ -73,7 +73,7 @@ STAGE PLANS: native: true predicateExpression: FilterExprOrExpr(children: FilterExprAndExpr(children: FilterDoubleColGreaterDoubleColumn(col 5:double, col 13:double)(children: CastLongToDouble(col 0:tinyint) -> 13:double), FilterLongColGreaterLongScalar(col 11:boolean, val 0)), FilterLongColLessLongColumn(col 3:bigint, col 0:bigint)(children: col 0:tinyint), FilterLongColGreaterLongColumn(col 2:bigint, col 3:bigint)(children: col 2:int), FilterLongColLessLongScalar(col 10:boolean, val 0)) predicate: (((cdouble > UDFToDouble(ctinyint)) and (cboolean2 > 0)) or (UDFToLong(cint) > cbigint) or (cbigint < UDFToLong(ctinyint)) or (cboolean1 < 0)) (type: boolean) - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 593751 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ctinyint (type: tinyint), cfloat (type: float), cint (type: int), cdouble (type: double), UDFToDouble(ctinyint) (type: double), (UDFToDouble(ctinyint) * UDFToDouble(ctinyint)) (type: double), (cdouble * cdouble) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 @@ -82,7 +82,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [0, 4, 2, 5, 14, 17, 18] selectExpressions: CastLongToDouble(col 0:tinyint) -> 14:double, DoubleColMultiplyDoubleColumn(col 15:double, col 16:double)(children: CastLongToDouble(col 0:tinyint) -> 15:double, CastLongToDouble(col 0:tinyint) -> 16:double) -> 17:double, DoubleColMultiplyDoubleColumn(col 5:double, col 5:double) -> 18:double - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 593751 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(_col5), sum(_col4), count(_col0), sum(_col1), max(_col0), max(_col2), sum(_col6), sum(_col3), count(_col3), count(_col2) Group By Vectorization: diff --git ql/src/test/results/clientpositive/spark/parquet_vectorization_10.q.out ql/src/test/results/clientpositive/spark/parquet_vectorization_10.q.out index 2a13c1053d..a0cbcbdc43 100644 --- ql/src/test/results/clientpositive/spark/parquet_vectorization_10.q.out +++ ql/src/test/results/clientpositive/spark/parquet_vectorization_10.q.out @@ -68,7 +68,7 @@ STAGE PLANS: TableScan alias: alltypesparquet filterExpr: ((cstring2 <= '10') or ((UDFToDouble(ctinyint) > cdouble) and (CAST( ctinyint AS decimal(6,2)) <= -5638.15)) or ((cdouble > 6981.0D) and ((CAST( csmallint AS decimal(11,4)) = 9763215.5639) or (cstring1 like '%a')))) (type: boolean) - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 593751 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true Filter Operator @@ -77,7 +77,7 @@ STAGE PLANS: native: true predicateExpression: FilterExprOrExpr(children: FilterStringGroupColLessEqualStringScalar(col 7:string, val 10), FilterExprAndExpr(children: FilterDoubleColGreaterDoubleColumn(col 13:double, col 5:double)(children: CastLongToDouble(col 0:tinyint) -> 13:double), FilterDecimalColLessEqualDecimalScalar(col 14:decimal(6,2), val -5638.15)(children: CastLongToDecimal(col 0:tinyint) -> 14:decimal(6,2))), FilterExprAndExpr(children: FilterDoubleColGreaterDoubleScalar(col 5:double, val 6981.0), FilterExprOrExpr(children: FilterDecimalColEqualDecimalScalar(col 15:decimal(11,4), val 9763215.5639)(children: CastLongToDecimal(col 1:smallint) -> 15:decimal(11,4)), FilterStringColLikeStringScalar(col 6:string, pattern %a)))) predicate: (((UDFToDouble(ctinyint) > cdouble) and (CAST( ctinyint AS decimal(6,2)) <= -5638.15)) or ((cdouble > 6981.0D) and ((CAST( csmallint AS decimal(11,4)) = 9763215.5639) or (cstring1 like '%a'))) or (cstring2 <= '10')) (type: boolean) - Statistics: Num rows: 9557 Data size: 114684 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 9557 Data size: 461790 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cdouble (type: double), ctimestamp1 (type: timestamp), ctinyint (type: tinyint), cboolean1 (type: boolean), cstring1 (type: string), (- cdouble) (type: double), (cdouble + UDFToDouble(csmallint)) (type: double), ((cdouble + UDFToDouble(csmallint)) % 33.0D) (type: double), (- cdouble) (type: double), (UDFToDouble(ctinyint) % cdouble) (type: double), (UDFToShort(ctinyint) % csmallint) (type: smallint), (- cdouble) (type: double), (cbigint * UDFToLong((UDFToShort(ctinyint) % csmallint))) (type: bigint), (9763215.5639D - (cdouble + UDFToDouble(csmallint))) (type: double), (- (- cdouble)) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 @@ -86,13 +86,13 @@ STAGE PLANS: native: true projectedOutputColumnNums: [5, 8, 0, 10, 6, 16, 18, 21, 22, 24, 25, 26, 28, 31, 33] selectExpressions: DoubleColUnaryMinus(col 5:double) -> 16:double, DoubleColAddDoubleColumn(col 5:double, col 17:double)(children: CastLongToDouble(col 1:smallint) -> 17:double) -> 18:double, DoubleColModuloDoubleScalar(col 20:double, val 33.0)(children: DoubleColAddDoubleColumn(col 5:double, col 19:double)(children: CastLongToDouble(col 1:smallint) -> 19:double) -> 20:double) -> 21:double, DoubleColUnaryMinus(col 5:double) -> 22:double, DoubleColModuloDoubleColumn(col 23:double, col 5:double)(children: CastLongToDouble(col 0:tinyint) -> 23:double) -> 24:double, LongColModuloLongColumn(col 0:smallint, col 1:smallint)(children: col 0:tinyint) -> 25:smallint, DoubleColUnaryMinus(col 5:double) -> 26:double, LongColMultiplyLongColumn(col 3:bigint, col 27:bigint)(children: LongColModuloLongColumn(col 0:smallint, col 1:smallint)(children: col 0:tinyint) -> 27:smallint) -> 28:bigint, DoubleScalarSubtractDoubleColumn(val 9763215.5639, col 30:double)(children: DoubleColAddDoubleColumn(col 5:double, col 29:double)(children: CastLongToDouble(col 1:smallint) -> 29:double) -> 30:double) -> 31:double, DoubleColUnaryMinus(col 32:double)(children: DoubleColUnaryMinus(col 5:double) -> 32:double) -> 33:double - Statistics: Num rows: 9557 Data size: 114684 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 9557 Data size: 461790 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 9557 Data size: 114684 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 9557 Data size: 461790 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/spark/parquet_vectorization_11.q.out ql/src/test/results/clientpositive/spark/parquet_vectorization_11.q.out index 5eccb593b5..61782ad73a 100644 --- ql/src/test/results/clientpositive/spark/parquet_vectorization_11.q.out +++ ql/src/test/results/clientpositive/spark/parquet_vectorization_11.q.out @@ -50,7 +50,7 @@ STAGE PLANS: TableScan alias: alltypesparquet filterExpr: ((cstring2 = cstring1) or (ctimestamp1 is null and (cstring1 like '%a'))) (type: boolean) - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 593751 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true Filter Operator @@ -59,7 +59,7 @@ STAGE PLANS: native: true predicateExpression: FilterExprOrExpr(children: FilterStringGroupColEqualStringGroupColumn(col 7:string, col 6:string), FilterExprAndExpr(children: SelectColumnIsNull(col 8:timestamp), FilterStringColLikeStringScalar(col 6:string, pattern %a))) predicate: ((cstring2 = cstring1) or (ctimestamp1 is null and (cstring1 like '%a'))) (type: boolean) - Statistics: Num rows: 9216 Data size: 110592 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 9216 Data size: 445313 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cstring1 (type: string), cboolean1 (type: boolean), cdouble (type: double), ctimestamp1 (type: timestamp), (-3728 * UDFToInteger(csmallint)) (type: int), (cdouble - 9763215.5639D) (type: double), (- cdouble) (type: double), ((- cdouble) + 6981.0D) (type: double), (cdouble * -5638.15D) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 @@ -68,13 +68,13 @@ STAGE PLANS: native: true projectedOutputColumnNums: [6, 10, 5, 8, 13, 14, 15, 17, 18] selectExpressions: LongScalarMultiplyLongColumn(val -3728, col 1:int)(children: col 1:smallint) -> 13:int, DoubleColSubtractDoubleScalar(col 5:double, val 9763215.5639) -> 14:double, DoubleColUnaryMinus(col 5:double) -> 15:double, DoubleColAddDoubleScalar(col 16:double, val 6981.0)(children: DoubleColUnaryMinus(col 5:double) -> 16:double) -> 17:double, DoubleColMultiplyDoubleScalar(col 5:double, val -5638.15) -> 18:double - Statistics: Num rows: 9216 Data size: 110592 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 9216 Data size: 445313 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 9216 Data size: 110592 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 9216 Data size: 445313 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/spark/parquet_vectorization_12.q.out ql/src/test/results/clientpositive/spark/parquet_vectorization_12.q.out index 5005e050b2..cb88c4b3a0 100644 --- ql/src/test/results/clientpositive/spark/parquet_vectorization_12.q.out +++ ql/src/test/results/clientpositive/spark/parquet_vectorization_12.q.out @@ -87,7 +87,7 @@ STAGE PLANS: TableScan alias: alltypesparquet filterExpr: (ctimestamp1 is null and ((cboolean1 >= cboolean2) or (UDFToShort(ctinyint) <> csmallint)) and ((cstring1 like '%a') or ((cboolean2 <= 1) and (cbigint >= UDFToLong(csmallint))))) (type: boolean) - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 593751 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true Filter Operator @@ -96,7 +96,7 @@ STAGE PLANS: native: true predicateExpression: FilterExprAndExpr(children: SelectColumnIsNull(col 8:timestamp), FilterExprOrExpr(children: FilterLongColGreaterEqualLongColumn(col 10:boolean, col 11:boolean), FilterLongColNotEqualLongColumn(col 0:smallint, col 1:smallint)(children: col 0:tinyint)), FilterExprOrExpr(children: FilterStringColLikeStringScalar(col 6:string, pattern %a), FilterExprAndExpr(children: FilterLongColLessEqualLongScalar(col 11:boolean, val 1), FilterLongColGreaterEqualLongColumn(col 3:bigint, col 1:bigint)(children: col 1:smallint)))) predicate: (((cboolean1 >= cboolean2) or (UDFToShort(ctinyint) <> csmallint)) and ((cstring1 like '%a') or ((cboolean2 <= 1) and (cbigint >= UDFToLong(csmallint)))) and ctimestamp1 is null) (type: boolean) - Statistics: Num rows: 3754 Data size: 45048 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3754 Data size: 181391 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cbigint (type: bigint), cboolean1 (type: boolean), cstring1 (type: string), cdouble (type: double), UDFToDouble(cbigint) (type: double), (UDFToDouble(cbigint) * UDFToDouble(cbigint)) (type: double), (cdouble * cdouble) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 @@ -105,7 +105,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [3, 10, 6, 5, 13, 16, 17] selectExpressions: CastLongToDouble(col 3:bigint) -> 13:double, DoubleColMultiplyDoubleColumn(col 14:double, col 15:double)(children: CastLongToDouble(col 3:bigint) -> 14:double, CastLongToDouble(col 3:bigint) -> 15:double) -> 16:double, DoubleColMultiplyDoubleColumn(col 5:double, col 5:double) -> 17:double - Statistics: Num rows: 3754 Data size: 45048 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3754 Data size: 181391 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(_col0), sum(_col5), sum(_col4), sum(_col3), count(_col3), sum(_col0), sum(_col6) Group By Vectorization: @@ -119,7 +119,7 @@ STAGE PLANS: keys: _col3 (type: double), _col0 (type: bigint), _col2 (type: string), _col1 (type: boolean) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 - Statistics: Num rows: 3754 Data size: 45048 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3754 Data size: 181391 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: double), _col1 (type: bigint), _col2 (type: string), _col3 (type: boolean) sort order: ++++ @@ -128,7 +128,7 @@ STAGE PLANS: className: VectorReduceSinkMultiKeyOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 3754 Data size: 45048 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3754 Data size: 181391 Basic stats: COMPLETE Column stats: NONE value expressions: _col4 (type: bigint), _col5 (type: double), _col6 (type: double), _col7 (type: double), _col8 (type: bigint), _col9 (type: bigint), _col10 (type: double) Execution mode: vectorized Map Vectorization: @@ -162,7 +162,7 @@ STAGE PLANS: keys: KEY._col0 (type: double), KEY._col1 (type: bigint), KEY._col2 (type: string), KEY._col3 (type: boolean) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 - Statistics: Num rows: 1877 Data size: 22524 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1877 Data size: 90695 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col1 (type: bigint), _col3 (type: boolean), _col2 (type: string), _col0 (type: double), (-6432.0D * _col0) (type: double), (- _col1) (type: bigint), _col4 (type: bigint), (_col1 * _col4) (type: bigint), power(((_col5 - ((_col6 * _col6) / _col4)) / CASE WHEN ((_col4 = 1L)) THEN (null) ELSE ((_col4 - 1)) END), 0.5) (type: double), ((-6432.0D * _col0) / -6432.0D) (type: double), (- ((-6432.0D * _col0) / -6432.0D)) (type: double), (_col7 / _col8) (type: double), (- (-6432.0D * _col0)) (type: double), (-5638.15 + CAST( _col1 AS decimal(19,0))) (type: decimal(22,2)), _col9 (type: bigint), ((_col7 / _col8) / (-6432.0D * _col0)) (type: double), (- (- ((-6432.0D * _col0) / -6432.0D))) (type: double), (((-6432.0D * _col0) / -6432.0D) + (- (-6432.0D * _col0))) (type: double), power(((_col10 - ((_col7 * _col7) / _col8)) / _col8), 0.5) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col17, _col18, _col19 @@ -171,7 +171,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [1, 3, 2, 0, 11, 12, 4, 13, 21, 23, 26, 27, 29, 31, 9, 34, 38, 43, 48] selectExpressions: DoubleScalarMultiplyDoubleColumn(val -6432.0, col 0:double) -> 11:double, LongColUnaryMinus(col 1:bigint) -> 12:bigint, LongColMultiplyLongColumn(col 1:bigint, col 4:bigint) -> 13:bigint, FuncPowerDoubleToDouble(col 20:double)(children: DoubleColDivideLongColumn(col 16:double, col 19:bigint)(children: DoubleColSubtractDoubleColumn(col 5:double, col 15:double)(children: DoubleColDivideLongColumn(col 14:double, col 4:bigint)(children: DoubleColMultiplyDoubleColumn(col 6:double, col 6:double) -> 14:double) -> 15:double) -> 16:double, IfExprNullCondExpr(col 17:boolean, null, col 18:bigint)(children: LongColEqualLongScalar(col 4:bigint, val 1) -> 17:boolean, LongColSubtractLongScalar(col 4:bigint, val 1) -> 18:bigint) -> 19:bigint) -> 20:double) -> 21:double, DoubleColDivideDoubleScalar(col 22:double, val -6432.0)(children: DoubleScalarMultiplyDoubleColumn(val -6432.0, col 0:double) -> 22:double) -> 23:double, DoubleColUnaryMinus(col 25:double)(children: DoubleColDivideDoubleScalar(col 24:double, val -6432.0)(children: DoubleScalarMultiplyDoubleColumn(val -6432.0, col 0:double) -> 24:double) -> 25:double) -> 26:double, DoubleColDivideLongColumn(col 7:double, col 8:bigint) -> 27:double, DoubleColUnaryMinus(col 28:double)(children: DoubleScalarMultiplyDoubleColumn(val -6432.0, col 0:double) -> 28:double) -> 29:double, DecimalScalarAddDecimalColumn(val -5638.15, col 30:decimal(19,0))(children: CastLongToDecimal(col 1:bigint) -> 30:decimal(19,0)) -> 31:decimal(22,2), DoubleColDivideDoubleColumn(col 32:double, col 33:double)(children: DoubleColDivideLongColumn(col 7:double, col 8:bigint) -> 32:double, DoubleScalarMultiplyDoubleColumn(val -6432.0, col 0:double) -> 33:double) -> 34:double, DoubleColUnaryMinus(col 37:double)(children: DoubleColUnaryMinus(col 36:double)(children: DoubleColDivideDoubleScalar(col 35:double, val -6432.0)(children: DoubleScalarMultiplyDoubleColumn(val -6432.0, col 0:double) -> 35:double) -> 36:double) -> 37:double) -> 38:double, DoubleColAddDoubleColumn(col 40:double, col 42:double)(children: DoubleColDivideDoubleScalar(col 39:double, val -6432.0)(children: DoubleScalarMultiplyDoubleColumn(val -6432.0, col 0:double) -> 39:double) -> 40:double, DoubleColUnaryMinus(col 41:double)(children: DoubleScalarMultiplyDoubleColumn(val -6432.0, col 0:double) -> 41:double) -> 42:double) -> 43:double, FuncPowerDoubleToDouble(col 47:double)(children: DoubleColDivideLongColumn(col 46:double, col 8:bigint)(children: DoubleColSubtractDoubleColumn(col 10:double, col 45:double)(children: DoubleColDivideLongColumn(col 44:double, col 8:bigint)(children: DoubleColMultiplyDoubleColumn(col 7:double, col 7:double) -> 44:double) -> 45:double) -> 46:double) -> 47:double) -> 48:double - Statistics: Num rows: 1877 Data size: 22524 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1877 Data size: 90695 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col3 (type: double), _col0 (type: bigint), _col2 (type: string) sort order: +++ @@ -179,7 +179,7 @@ STAGE PLANS: className: VectorReduceSinkObjectHashOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 1877 Data size: 22524 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1877 Data size: 90695 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: boolean), _col4 (type: double), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: bigint), _col8 (type: double), _col9 (type: double), _col10 (type: double), _col11 (type: double), _col12 (type: double), _col13 (type: decimal(22,2)), _col14 (type: bigint), _col15 (type: double), _col17 (type: double), _col18 (type: double), _col19 (type: double) Reducer 3 Execution mode: vectorized @@ -198,13 +198,13 @@ STAGE PLANS: native: true projectedOutputColumnNums: [1, 3, 2, 19, 0, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 11, 16, 17, 18] selectExpressions: ConstantVectorExpression(val null) -> 19:timestamp - Statistics: Num rows: 1877 Data size: 22524 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1877 Data size: 90695 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 1877 Data size: 22524 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1877 Data size: 90695 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/spark/parquet_vectorization_13.q.out ql/src/test/results/clientpositive/spark/parquet_vectorization_13.q.out index 507498e552..3f3a861b42 100644 --- ql/src/test/results/clientpositive/spark/parquet_vectorization_13.q.out +++ ql/src/test/results/clientpositive/spark/parquet_vectorization_13.q.out @@ -89,7 +89,7 @@ STAGE PLANS: TableScan alias: alltypesparquet filterExpr: (((cfloat < 3569.0) and (cdouble <= 10.175D) and (cboolean1 <> 1)) or ((UDFToDouble(ctimestamp1) > -28789.0D) and (UDFToDouble(ctimestamp2) <> -28788.0D) and (CAST( ctinyint AS decimal(11,4)) < 9763215.5639))) (type: boolean) - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 593751 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true Filter Operator @@ -98,7 +98,7 @@ STAGE PLANS: native: true predicateExpression: FilterExprOrExpr(children: FilterExprAndExpr(children: FilterDoubleColLessDoubleScalar(col 4:float, val 3569.0), FilterDoubleColLessEqualDoubleScalar(col 5:double, val 10.175), FilterLongColNotEqualLongScalar(col 10:boolean, val 1)), FilterExprAndExpr(children: FilterDoubleColGreaterDoubleScalar(col 13:double, val -28789.0)(children: CastTimestampToDouble(col 8:timestamp) -> 13:double), FilterDoubleColNotEqualDoubleScalar(col 14:double, val -28788.0)(children: CastTimestampToDouble(col 9:timestamp) -> 14:double), FilterDecimalColLessDecimalScalar(col 15:decimal(11,4), val 9763215.5639)(children: CastLongToDecimal(col 0:tinyint) -> 15:decimal(11,4)))) predicate: (((UDFToDouble(ctimestamp1) > -28789.0D) and (UDFToDouble(ctimestamp2) <> -28788.0D) and (CAST( ctinyint AS decimal(11,4)) < 9763215.5639)) or ((cfloat < 3569.0) and (cdouble <= 10.175D) and (cboolean1 <> 1))) (type: boolean) - Statistics: Num rows: 2730 Data size: 32760 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2730 Data size: 131912 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cboolean1 (type: boolean), ctinyint (type: tinyint), ctimestamp1 (type: timestamp), cfloat (type: float), cstring1 (type: string), UDFToDouble(cfloat) (type: double), (UDFToDouble(cfloat) * UDFToDouble(cfloat)) (type: double), UDFToDouble(ctinyint) (type: double), (UDFToDouble(ctinyint) * UDFToDouble(ctinyint)) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 @@ -107,7 +107,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [10, 0, 8, 4, 6, 4, 16, 17, 20] selectExpressions: DoubleColMultiplyDoubleColumn(col 4:double, col 4:double)(children: col 4:float, col 4:float) -> 16:double, CastLongToDouble(col 0:tinyint) -> 17:double, DoubleColMultiplyDoubleColumn(col 18:double, col 19:double)(children: CastLongToDouble(col 0:tinyint) -> 18:double, CastLongToDouble(col 0:tinyint) -> 19:double) -> 20:double - Statistics: Num rows: 2730 Data size: 32760 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2730 Data size: 131912 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: max(_col1), sum(_col3), sum(_col6), sum(_col5), count(_col3), sum(_col8), sum(_col7), count(_col1), max(_col3), min(_col1) Group By Vectorization: @@ -121,7 +121,7 @@ STAGE PLANS: keys: _col0 (type: boolean), _col1 (type: tinyint), _col2 (type: timestamp), _col3 (type: float), _col4 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 - Statistics: Num rows: 2730 Data size: 32760 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2730 Data size: 131912 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: boolean), _col1 (type: tinyint), _col2 (type: timestamp), _col3 (type: float), _col4 (type: string) sort order: +++++ @@ -130,7 +130,7 @@ STAGE PLANS: className: VectorReduceSinkMultiKeyOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 2730 Data size: 32760 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2730 Data size: 131912 Basic stats: COMPLETE Column stats: NONE value expressions: _col5 (type: tinyint), _col6 (type: double), _col7 (type: double), _col8 (type: double), _col9 (type: bigint), _col10 (type: double), _col11 (type: double), _col12 (type: bigint), _col13 (type: float), _col14 (type: tinyint) Execution mode: vectorized Map Vectorization: @@ -164,7 +164,7 @@ STAGE PLANS: keys: KEY._col0 (type: boolean), KEY._col1 (type: tinyint), KEY._col2 (type: timestamp), KEY._col3 (type: float), KEY._col4 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 - Statistics: Num rows: 1365 Data size: 16380 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1365 Data size: 65956 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: boolean), _col1 (type: tinyint), _col2 (type: timestamp), _col3 (type: float), _col4 (type: string), (- _col1) (type: tinyint), _col5 (type: tinyint), ((- _col1) + _col5) (type: tinyint), _col6 (type: double), (_col6 * UDFToDouble(((- _col1) + _col5))) (type: double), (- _col6) (type: double), (79.553 * _col3) (type: float), power(((_col7 - ((_col8 * _col8) / _col9)) / _col9), 0.5) (type: double), (- _col6) (type: double), power(((_col10 - ((_col11 * _col11) / _col12)) / _col12), 0.5) (type: double), (CAST( ((- _col1) + _col5) AS decimal(3,0)) - 10.175) (type: decimal(7,3)), (- (- _col6)) (type: double), (-26.28D / (- (- _col6))) (type: double), _col13 (type: float), ((_col6 * UDFToDouble(((- _col1) + _col5))) / UDFToDouble(_col1)) (type: double), _col14 (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 @@ -173,7 +173,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [0, 1, 2, 3, 4, 15, 5, 17, 6, 21, 22, 23, 28, 29, 34, 38, 40, 43, 13, 49, 14] selectExpressions: LongColUnaryMinus(col 1:tinyint) -> 15:tinyint, LongColAddLongColumn(col 16:tinyint, col 5:tinyint)(children: LongColUnaryMinus(col 1:tinyint) -> 16:tinyint) -> 17:tinyint, DoubleColMultiplyDoubleColumn(col 6:double, col 20:double)(children: CastLongToDouble(col 19:tinyint)(children: LongColAddLongColumn(col 18:tinyint, col 5:tinyint)(children: LongColUnaryMinus(col 1:tinyint) -> 18:tinyint) -> 19:tinyint) -> 20:double) -> 21:double, DoubleColUnaryMinus(col 6:double) -> 22:double, DoubleScalarMultiplyDoubleColumn(val 79.5530014038086, col 3:float) -> 23:float, FuncPowerDoubleToDouble(col 27:double)(children: DoubleColDivideLongColumn(col 26:double, col 9:bigint)(children: DoubleColSubtractDoubleColumn(col 7:double, col 25:double)(children: DoubleColDivideLongColumn(col 24:double, col 9:bigint)(children: DoubleColMultiplyDoubleColumn(col 8:double, col 8:double) -> 24:double) -> 25:double) -> 26:double) -> 27:double) -> 28:double, DoubleColUnaryMinus(col 6:double) -> 29:double, FuncPowerDoubleToDouble(col 33:double)(children: DoubleColDivideLongColumn(col 32:double, col 12:bigint)(children: DoubleColSubtractDoubleColumn(col 10:double, col 31:double)(children: DoubleColDivideLongColumn(col 30:double, col 12:bigint)(children: DoubleColMultiplyDoubleColumn(col 11:double, col 11:double) -> 30:double) -> 31:double) -> 32:double) -> 33:double) -> 34:double, DecimalColSubtractDecimalScalar(col 37:decimal(3,0), val 10.175)(children: CastLongToDecimal(col 36:tinyint)(children: LongColAddLongColumn(col 35:tinyint, col 5:tinyint)(children: LongColUnaryMinus(col 1:tinyint) -> 35:tinyint) -> 36:tinyint) -> 37:decimal(3,0)) -> 38:decimal(7,3), DoubleColUnaryMinus(col 39:double)(children: DoubleColUnaryMinus(col 6:double) -> 39:double) -> 40:double, DoubleScalarDivideDoubleColumn(val -26.28, col 42:double)(children: DoubleColUnaryMinus(col 41:double)(children: DoubleColUnaryMinus(col 6:double) -> 41:double) -> 42:double) -> 43:double, DoubleColDivideDoubleColumn(col 47:double, col 48:double)(children: DoubleColMultiplyDoubleColumn(col 6:double, col 46:double)(children: CastLongToDouble(col 45:tinyint)(children: LongColAddLongColumn(col 44:tinyint, col 5:tinyint)(children: LongColUnaryMinus(col 1:tinyint) -> 44:tinyint) -> 45:tinyint) -> 46:double) -> 47:double, CastLongToDouble(col 1:tinyint) -> 48:double) -> 49:double - Statistics: Num rows: 1365 Data size: 16380 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1365 Data size: 65956 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: boolean), _col1 (type: tinyint), _col2 (type: timestamp), _col3 (type: float), _col4 (type: string), _col5 (type: tinyint), _col6 (type: tinyint), _col7 (type: tinyint), _col8 (type: double), _col9 (type: double), _col10 (type: double), _col11 (type: float), _col12 (type: double), _col13 (type: double), _col14 (type: double), _col15 (type: decimal(7,3)), _col16 (type: double), _col17 (type: double), _col18 (type: float), _col19 (type: double), _col20 (type: tinyint) sort order: +++++++++++++++++++++ @@ -181,7 +181,7 @@ STAGE PLANS: className: VectorReduceSinkObjectHashOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 1365 Data size: 16380 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1365 Data size: 65956 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 Reducer 3 Execution mode: vectorized @@ -199,19 +199,19 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 10, 14, 15, 16, 17, 18, 19, 20] - Statistics: Num rows: 1365 Data size: 16380 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1365 Data size: 65956 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 40 Limit Vectorization: className: VectorLimitOperator native: true - Statistics: Num rows: 40 Data size: 480 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 1920 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 40 Data size: 480 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 1920 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -422,7 +422,7 @@ STAGE PLANS: TableScan alias: alltypesparquet filterExpr: (((cfloat < 3569.0) and (cdouble <= 10.175D) and (cboolean1 <> 1)) or ((UDFToDouble(ctimestamp1) > -28801.388D) and (UDFToDouble(ctimestamp2) <> -28801.336D) and (CAST( ctinyint AS decimal(11,4)) < 9763215.5639))) (type: boolean) - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 593751 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true Filter Operator @@ -431,7 +431,7 @@ STAGE PLANS: native: true predicateExpression: FilterExprOrExpr(children: FilterExprAndExpr(children: FilterDoubleColLessDoubleScalar(col 4:float, val 3569.0), FilterDoubleColLessEqualDoubleScalar(col 5:double, val 10.175), FilterLongColNotEqualLongScalar(col 10:boolean, val 1)), FilterExprAndExpr(children: FilterDoubleColGreaterDoubleScalar(col 13:double, val -28801.388)(children: CastTimestampToDouble(col 8:timestamp) -> 13:double), FilterDoubleColNotEqualDoubleScalar(col 14:double, val -28801.336)(children: CastTimestampToDouble(col 9:timestamp) -> 14:double), FilterDecimalColLessDecimalScalar(col 15:decimal(11,4), val 9763215.5639)(children: CastLongToDecimal(col 0:tinyint) -> 15:decimal(11,4)))) predicate: (((UDFToDouble(ctimestamp1) > -28801.388D) and (UDFToDouble(ctimestamp2) <> -28801.336D) and (CAST( ctinyint AS decimal(11,4)) < 9763215.5639)) or ((cfloat < 3569.0) and (cdouble <= 10.175D) and (cboolean1 <> 1))) (type: boolean) - Statistics: Num rows: 2730 Data size: 32760 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2730 Data size: 131912 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cboolean1 (type: boolean), ctinyint (type: tinyint), ctimestamp1 (type: timestamp), cfloat (type: float), cstring1 (type: string), UDFToDouble(cfloat) (type: double), (UDFToDouble(cfloat) * UDFToDouble(cfloat)) (type: double), UDFToDouble(ctinyint) (type: double), (UDFToDouble(ctinyint) * UDFToDouble(ctinyint)) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 @@ -440,7 +440,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [10, 0, 8, 4, 6, 4, 16, 17, 20] selectExpressions: DoubleColMultiplyDoubleColumn(col 4:double, col 4:double)(children: col 4:float, col 4:float) -> 16:double, CastLongToDouble(col 0:tinyint) -> 17:double, DoubleColMultiplyDoubleColumn(col 18:double, col 19:double)(children: CastLongToDouble(col 0:tinyint) -> 18:double, CastLongToDouble(col 0:tinyint) -> 19:double) -> 20:double - Statistics: Num rows: 2730 Data size: 32760 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2730 Data size: 131912 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: max(_col1), sum(_col3), sum(_col6), sum(_col5), count(_col3), sum(_col8), sum(_col7), count(_col1), max(_col3), min(_col1) Group By Vectorization: @@ -454,7 +454,7 @@ STAGE PLANS: keys: _col0 (type: boolean), _col1 (type: tinyint), _col2 (type: timestamp), _col3 (type: float), _col4 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 - Statistics: Num rows: 2730 Data size: 32760 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2730 Data size: 131912 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: boolean), _col1 (type: tinyint), _col2 (type: timestamp), _col3 (type: float), _col4 (type: string) sort order: +++++ @@ -463,7 +463,7 @@ STAGE PLANS: className: VectorReduceSinkMultiKeyOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 2730 Data size: 32760 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2730 Data size: 131912 Basic stats: COMPLETE Column stats: NONE value expressions: _col5 (type: tinyint), _col6 (type: double), _col7 (type: double), _col8 (type: double), _col9 (type: bigint), _col10 (type: double), _col11 (type: double), _col12 (type: bigint), _col13 (type: float), _col14 (type: tinyint) Execution mode: vectorized Map Vectorization: @@ -497,7 +497,7 @@ STAGE PLANS: keys: KEY._col0 (type: boolean), KEY._col1 (type: tinyint), KEY._col2 (type: timestamp), KEY._col3 (type: float), KEY._col4 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 - Statistics: Num rows: 1365 Data size: 16380 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1365 Data size: 65956 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: boolean), _col1 (type: tinyint), _col2 (type: timestamp), _col3 (type: float), _col4 (type: string), (- _col1) (type: tinyint), _col5 (type: tinyint), ((- _col1) + _col5) (type: tinyint), _col6 (type: double), (_col6 * UDFToDouble(((- _col1) + _col5))) (type: double), (- _col6) (type: double), (79.553 * _col3) (type: float), power(((_col7 - ((_col8 * _col8) / _col9)) / _col9), 0.5) (type: double), (- _col6) (type: double), power(((_col10 - ((_col11 * _col11) / _col12)) / _col12), 0.5) (type: double), (CAST( ((- _col1) + _col5) AS decimal(3,0)) - 10.175) (type: decimal(7,3)), (- (- _col6)) (type: double), (-26.28D / (- (- _col6))) (type: double), _col13 (type: float), ((_col6 * UDFToDouble(((- _col1) + _col5))) / UDFToDouble(_col1)) (type: double), _col14 (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 @@ -506,7 +506,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [0, 1, 2, 3, 4, 15, 5, 17, 6, 21, 22, 23, 28, 29, 34, 38, 40, 43, 13, 49, 14] selectExpressions: LongColUnaryMinus(col 1:tinyint) -> 15:tinyint, LongColAddLongColumn(col 16:tinyint, col 5:tinyint)(children: LongColUnaryMinus(col 1:tinyint) -> 16:tinyint) -> 17:tinyint, DoubleColMultiplyDoubleColumn(col 6:double, col 20:double)(children: CastLongToDouble(col 19:tinyint)(children: LongColAddLongColumn(col 18:tinyint, col 5:tinyint)(children: LongColUnaryMinus(col 1:tinyint) -> 18:tinyint) -> 19:tinyint) -> 20:double) -> 21:double, DoubleColUnaryMinus(col 6:double) -> 22:double, DoubleScalarMultiplyDoubleColumn(val 79.5530014038086, col 3:float) -> 23:float, FuncPowerDoubleToDouble(col 27:double)(children: DoubleColDivideLongColumn(col 26:double, col 9:bigint)(children: DoubleColSubtractDoubleColumn(col 7:double, col 25:double)(children: DoubleColDivideLongColumn(col 24:double, col 9:bigint)(children: DoubleColMultiplyDoubleColumn(col 8:double, col 8:double) -> 24:double) -> 25:double) -> 26:double) -> 27:double) -> 28:double, DoubleColUnaryMinus(col 6:double) -> 29:double, FuncPowerDoubleToDouble(col 33:double)(children: DoubleColDivideLongColumn(col 32:double, col 12:bigint)(children: DoubleColSubtractDoubleColumn(col 10:double, col 31:double)(children: DoubleColDivideLongColumn(col 30:double, col 12:bigint)(children: DoubleColMultiplyDoubleColumn(col 11:double, col 11:double) -> 30:double) -> 31:double) -> 32:double) -> 33:double) -> 34:double, DecimalColSubtractDecimalScalar(col 37:decimal(3,0), val 10.175)(children: CastLongToDecimal(col 36:tinyint)(children: LongColAddLongColumn(col 35:tinyint, col 5:tinyint)(children: LongColUnaryMinus(col 1:tinyint) -> 35:tinyint) -> 36:tinyint) -> 37:decimal(3,0)) -> 38:decimal(7,3), DoubleColUnaryMinus(col 39:double)(children: DoubleColUnaryMinus(col 6:double) -> 39:double) -> 40:double, DoubleScalarDivideDoubleColumn(val -26.28, col 42:double)(children: DoubleColUnaryMinus(col 41:double)(children: DoubleColUnaryMinus(col 6:double) -> 41:double) -> 42:double) -> 43:double, DoubleColDivideDoubleColumn(col 47:double, col 48:double)(children: DoubleColMultiplyDoubleColumn(col 6:double, col 46:double)(children: CastLongToDouble(col 45:tinyint)(children: LongColAddLongColumn(col 44:tinyint, col 5:tinyint)(children: LongColUnaryMinus(col 1:tinyint) -> 44:tinyint) -> 45:tinyint) -> 46:double) -> 47:double, CastLongToDouble(col 1:tinyint) -> 48:double) -> 49:double - Statistics: Num rows: 1365 Data size: 16380 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1365 Data size: 65956 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: boolean), _col1 (type: tinyint), _col2 (type: timestamp), _col3 (type: float), _col4 (type: string), _col5 (type: tinyint), _col6 (type: tinyint), _col7 (type: tinyint), _col8 (type: double), _col9 (type: double), _col10 (type: double), _col11 (type: float), _col12 (type: double), _col13 (type: double), _col14 (type: double), _col15 (type: decimal(7,3)), _col16 (type: double), _col17 (type: double), _col18 (type: float), _col19 (type: double), _col20 (type: tinyint) sort order: +++++++++++++++++++++ @@ -514,7 +514,7 @@ STAGE PLANS: className: VectorReduceSinkObjectHashOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 1365 Data size: 16380 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1365 Data size: 65956 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 Reducer 3 Execution mode: vectorized @@ -532,19 +532,19 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 10, 14, 15, 16, 17, 18, 19, 20] - Statistics: Num rows: 1365 Data size: 16380 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1365 Data size: 65956 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 40 Limit Vectorization: className: VectorLimitOperator native: true - Statistics: Num rows: 40 Data size: 480 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 1920 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 40 Data size: 480 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 1920 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/spark/parquet_vectorization_14.q.out ql/src/test/results/clientpositive/spark/parquet_vectorization_14.q.out index 85ba6d7224..b540f0d727 100644 --- ql/src/test/results/clientpositive/spark/parquet_vectorization_14.q.out +++ ql/src/test/results/clientpositive/spark/parquet_vectorization_14.q.out @@ -89,7 +89,7 @@ STAGE PLANS: TableScan alias: alltypesparquet filterExpr: ((UDFToLong(ctinyint) <= cbigint) and ((UDFToDouble(cint) <= cdouble) or (ctimestamp2 < ctimestamp1)) and (cdouble < UDFToDouble(ctinyint)) and ((cbigint > -257L) or (cfloat < UDFToFloat(cint)))) (type: boolean) - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 593751 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true Filter Operator @@ -98,7 +98,7 @@ STAGE PLANS: native: true predicateExpression: FilterExprAndExpr(children: FilterLongColLessEqualLongColumn(col 0:bigint, col 3:bigint)(children: col 0:tinyint), FilterExprOrExpr(children: FilterDoubleColLessEqualDoubleColumn(col 13:double, col 5:double)(children: CastLongToDouble(col 2:int) -> 13:double), FilterTimestampColLessTimestampColumn(col 9:timestamp, col 8:timestamp)), FilterDoubleColLessDoubleColumn(col 5:double, col 14:double)(children: CastLongToDouble(col 0:tinyint) -> 14:double), FilterExprOrExpr(children: FilterLongColGreaterLongScalar(col 3:bigint, val -257), FilterDoubleColLessDoubleColumn(col 4:float, col 15:float)(children: CastLongToFloatViaLongToDouble(col 2:int) -> 15:float))) predicate: (((UDFToDouble(cint) <= cdouble) or (ctimestamp2 < ctimestamp1)) and ((cbigint > -257L) or (cfloat < UDFToFloat(cint))) and (UDFToLong(ctinyint) <= cbigint) and (cdouble < UDFToDouble(ctinyint))) (type: boolean) - Statistics: Num rows: 606 Data size: 7272 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 606 Data size: 29281 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ctimestamp1 (type: timestamp), cfloat (type: float), cstring1 (type: string), cboolean1 (type: boolean), cdouble (type: double), (- (-26.28D + cdouble)) (type: double), ((- (-26.28D + cdouble)) * (- (-26.28D + cdouble))) (type: double), UDFToDouble(cfloat) (type: double), (UDFToDouble(cfloat) * UDFToDouble(cfloat)) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 @@ -107,7 +107,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [8, 4, 6, 10, 5, 17, 22, 4, 23] selectExpressions: DoubleColUnaryMinus(col 16:double)(children: DoubleScalarAddDoubleColumn(val -26.28, col 5:double) -> 16:double) -> 17:double, DoubleColMultiplyDoubleColumn(col 19:double, col 21:double)(children: DoubleColUnaryMinus(col 18:double)(children: DoubleScalarAddDoubleColumn(val -26.28, col 5:double) -> 18:double) -> 19:double, DoubleColUnaryMinus(col 20:double)(children: DoubleScalarAddDoubleColumn(val -26.28, col 5:double) -> 20:double) -> 21:double) -> 22:double, DoubleColMultiplyDoubleColumn(col 4:double, col 4:double)(children: col 4:float, col 4:float) -> 23:double - Statistics: Num rows: 606 Data size: 7272 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 606 Data size: 29281 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(_col6), sum(_col5), count(_col5), max(_col1), sum(_col8), sum(_col7), count(_col1) Group By Vectorization: @@ -121,7 +121,7 @@ STAGE PLANS: keys: _col2 (type: string), _col1 (type: float), _col4 (type: double), _col0 (type: timestamp), _col3 (type: boolean) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 - Statistics: Num rows: 606 Data size: 7272 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 606 Data size: 29281 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: float), _col2 (type: double), _col3 (type: timestamp), _col4 (type: boolean) sort order: +++++ @@ -130,7 +130,7 @@ STAGE PLANS: className: VectorReduceSinkMultiKeyOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 606 Data size: 7272 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 606 Data size: 29281 Basic stats: COMPLETE Column stats: NONE value expressions: _col5 (type: double), _col6 (type: double), _col7 (type: bigint), _col8 (type: float), _col9 (type: double), _col10 (type: double), _col11 (type: bigint) Execution mode: vectorized Map Vectorization: @@ -164,7 +164,7 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: float), KEY._col2 (type: double), KEY._col3 (type: timestamp), KEY._col4 (type: boolean) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 - Statistics: Num rows: 303 Data size: 3636 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 303 Data size: 14640 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col3 (type: timestamp), _col1 (type: float), _col0 (type: string), _col4 (type: boolean), _col2 (type: double), (-26.28D + _col2) (type: double), (- (-26.28D + _col2)) (type: double), power(((_col5 - ((_col6 * _col6) / _col7)) / CASE WHEN ((_col7 = 1L)) THEN (null) ELSE ((_col7 - 1)) END), 0.5) (type: double), (_col1 * -26.28) (type: float), _col8 (type: float), (- _col1) (type: float), (- _col8) (type: float), ((- (-26.28D + _col2)) / 10.175D) (type: double), power(((_col9 - ((_col10 * _col10) / _col11)) / _col11), 0.5) (type: double), _col11 (type: bigint), (- ((- (-26.28D + _col2)) / 10.175D)) (type: double), (-1.389D % power(((_col5 - ((_col6 * _col6) / _col7)) / CASE WHEN ((_col7 = 1L)) THEN (null) ELSE ((_col7 - 1)) END), 0.5)) (type: double), (UDFToDouble(_col1) - _col2) (type: double), ((_col9 - ((_col10 * _col10) / _col11)) / _col11) (type: double), (((_col9 - ((_col10 * _col10) / _col11)) / _col11) % 10.175D) (type: double), ((_col9 - ((_col10 * _col10) / _col11)) / CASE WHEN ((_col11 = 1L)) THEN (null) ELSE ((_col11 - 1)) END) (type: double), (- (UDFToDouble(_col1) - _col2)) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21 @@ -173,7 +173,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [3, 1, 0, 4, 2, 12, 14, 22, 23, 8, 24, 25, 28, 33, 11, 37, 46, 47, 51, 56, 63, 65] selectExpressions: DoubleScalarAddDoubleColumn(val -26.28, col 2:double) -> 12:double, DoubleColUnaryMinus(col 13:double)(children: DoubleScalarAddDoubleColumn(val -26.28, col 2:double) -> 13:double) -> 14:double, FuncPowerDoubleToDouble(col 21:double)(children: DoubleColDivideLongColumn(col 17:double, col 20:bigint)(children: DoubleColSubtractDoubleColumn(col 5:double, col 16:double)(children: DoubleColDivideLongColumn(col 15:double, col 7:bigint)(children: DoubleColMultiplyDoubleColumn(col 6:double, col 6:double) -> 15:double) -> 16:double) -> 17:double, IfExprNullCondExpr(col 18:boolean, null, col 19:bigint)(children: LongColEqualLongScalar(col 7:bigint, val 1) -> 18:boolean, LongColSubtractLongScalar(col 7:bigint, val 1) -> 19:bigint) -> 20:bigint) -> 21:double) -> 22:double, DoubleColMultiplyDoubleScalar(col 1:float, val -26.280000686645508) -> 23:float, DoubleColUnaryMinus(col 1:float) -> 24:float, DoubleColUnaryMinus(col 8:float) -> 25:float, DoubleColDivideDoubleScalar(col 27:double, val 10.175)(children: DoubleColUnaryMinus(col 26:double)(children: DoubleScalarAddDoubleColumn(val -26.28, col 2:double) -> 26:double) -> 27:double) -> 28:double, FuncPowerDoubleToDouble(col 32:double)(children: DoubleColDivideLongColumn(col 31:double, col 11:bigint)(children: DoubleColSubtractDoubleColumn(col 9:double, col 30:double)(children: DoubleColDivideLongColumn(col 29:double, col 11:bigint)(children: DoubleColMultiplyDoubleColumn(col 10:double, col 10:double) -> 29:double) -> 30:double) -> 31:double) -> 32:double) -> 33:double, DoubleColUnaryMinus(col 36:double)(children: DoubleColDivideDoubleScalar(col 35:double, val 10.175)(children: DoubleColUnaryMinus(col 34:double)(children: DoubleScalarAddDoubleColumn(val -26.28, col 2:double) -> 34:double) -> 35:double) -> 36:double) -> 37:double, DoubleScalarModuloDoubleColumn(val -1.389, col 45:double)(children: FuncPowerDoubleToDouble(col 44:double)(children: DoubleColDivideLongColumn(col 40:double, col 43:bigint)(children: DoubleColSubtractDoubleColumn(col 5:double, col 39:double)(children: DoubleColDivideLongColumn(col 38:double, col 7:bigint)(children: DoubleColMultiplyDoubleColumn(col 6:double, col 6:double) -> 38:double) -> 39:double) -> 40:double, IfExprNullCondExpr(col 41:boolean, null, col 42:bigint)(children: LongColEqualLongScalar(col 7:bigint, val 1) -> 41:boolean, LongColSubtractLongScalar(col 7:bigint, val 1) -> 42:bigint) -> 43:bigint) -> 44:double) -> 45:double) -> 46:double, DoubleColSubtractDoubleColumn(col 1:double, col 2:double)(children: col 1:float) -> 47:double, DoubleColDivideLongColumn(col 50:double, col 11:bigint)(children: DoubleColSubtractDoubleColumn(col 9:double, col 49:double)(children: DoubleColDivideLongColumn(col 48:double, col 11:bigint)(children: DoubleColMultiplyDoubleColumn(col 10:double, col 10:double) -> 48:double) -> 49:double) -> 50:double) -> 51:double, DoubleColModuloDoubleScalar(col 55:double, val 10.175)(children: DoubleColDivideLongColumn(col 54:double, col 11:bigint)(children: DoubleColSubtractDoubleColumn(col 9:double, col 53:double)(children: DoubleColDivideLongColumn(col 52:double, col 11:bigint)(children: DoubleColMultiplyDoubleColumn(col 10:double, col 10:double) -> 52:double) -> 53:double) -> 54:double) -> 55:double) -> 56:double, DoubleColDivideLongColumn(col 59:double, col 62:bigint)(children: DoubleColSubtractDoubleColumn(col 9:double, col 58:double)(children: DoubleColDivideLongColumn(col 57:double, col 11:bigint)(children: DoubleColMultiplyDoubleColumn(col 10:double, col 10:double) -> 57:double) -> 58:double) -> 59:double, IfExprNullCondExpr(col 60:boolean, null, col 61:bigint)(children: LongColEqualLongScalar(col 11:bigint, val 1) -> 60:boolean, LongColSubtractLongScalar(col 11:bigint, val 1) -> 61:bigint) -> 62:bigint) -> 63:double, DoubleColUnaryMinus(col 64:double)(children: DoubleColSubtractDoubleColumn(col 1:double, col 2:double)(children: col 1:float) -> 64:double) -> 65:double - Statistics: Num rows: 303 Data size: 3636 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 303 Data size: 14640 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col2 (type: string), _col1 (type: float), _col4 (type: double), _col0 (type: timestamp) sort order: ++++ @@ -181,7 +181,7 @@ STAGE PLANS: className: VectorReduceSinkObjectHashOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 303 Data size: 3636 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 303 Data size: 14640 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: boolean), _col5 (type: double), _col6 (type: double), _col7 (type: double), _col8 (type: float), _col9 (type: float), _col10 (type: float), _col11 (type: float), _col12 (type: double), _col13 (type: double), _col14 (type: bigint), _col15 (type: double), _col16 (type: double), _col17 (type: double), _col18 (type: double), _col19 (type: double), _col20 (type: double), _col21 (type: double) Reducer 3 Execution mode: vectorized @@ -199,13 +199,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [3, 1, 0, 4, 2, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21] - Statistics: Num rows: 303 Data size: 3636 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 303 Data size: 14640 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 303 Data size: 3636 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 303 Data size: 14640 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/spark/parquet_vectorization_15.q.out ql/src/test/results/clientpositive/spark/parquet_vectorization_15.q.out index fcd2a0ccc6..9083e9c915 100644 --- ql/src/test/results/clientpositive/spark/parquet_vectorization_15.q.out +++ ql/src/test/results/clientpositive/spark/parquet_vectorization_15.q.out @@ -85,7 +85,7 @@ STAGE PLANS: TableScan alias: alltypesparquet filterExpr: ((cstring2 like '%ss%') or (cstring1 like '10%') or ((cint >= -75) and (UDFToShort(ctinyint) = csmallint) and (cdouble >= -3728.0D))) (type: boolean) - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 593751 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true Filter Operator @@ -94,7 +94,7 @@ STAGE PLANS: native: true predicateExpression: FilterExprOrExpr(children: FilterStringColLikeStringScalar(col 7:string, pattern %ss%), FilterStringColLikeStringScalar(col 6:string, pattern 10%), FilterExprAndExpr(children: FilterLongColGreaterEqualLongScalar(col 2:int, val -75), FilterLongColEqualLongColumn(col 0:smallint, col 1:smallint)(children: col 0:tinyint), FilterDoubleColGreaterEqualDoubleScalar(col 5:double, val -3728.0))) predicate: (((cint >= -75) and (UDFToShort(ctinyint) = csmallint) and (cdouble >= -3728.0D)) or (cstring1 like '10%') or (cstring2 like '%ss%')) (type: boolean) - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 593751 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cfloat (type: float), cboolean1 (type: boolean), cdouble (type: double), cstring1 (type: string), ctinyint (type: tinyint), cint (type: int), ctimestamp1 (type: timestamp), UDFToDouble(cfloat) (type: double), (UDFToDouble(cfloat) * UDFToDouble(cfloat)) (type: double), UDFToDouble(ctinyint) (type: double), (UDFToDouble(ctinyint) * UDFToDouble(ctinyint)) (type: double), UDFToDouble(cint) (type: double), (UDFToDouble(cint) * UDFToDouble(cint)) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 @@ -103,7 +103,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [4, 10, 5, 6, 0, 2, 8, 4, 13, 14, 17, 18, 21] selectExpressions: DoubleColMultiplyDoubleColumn(col 4:double, col 4:double)(children: col 4:float, col 4:float) -> 13:double, CastLongToDouble(col 0:tinyint) -> 14:double, DoubleColMultiplyDoubleColumn(col 15:double, col 16:double)(children: CastLongToDouble(col 0:tinyint) -> 15:double, CastLongToDouble(col 0:tinyint) -> 16:double) -> 17:double, CastLongToDouble(col 2:int) -> 18:double, DoubleColMultiplyDoubleColumn(col 19:double, col 20:double)(children: CastLongToDouble(col 2:int) -> 19:double, CastLongToDouble(col 2:int) -> 20:double) -> 21:double - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 593751 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(_col8), sum(_col7), count(_col0), min(_col2), sum(_col10), sum(_col9), count(_col4), sum(_col12), sum(_col11), count(_col5) Group By Vectorization: @@ -117,7 +117,7 @@ STAGE PLANS: keys: _col0 (type: float), _col1 (type: boolean), _col2 (type: double), _col3 (type: string), _col4 (type: tinyint), _col5 (type: int), _col6 (type: timestamp) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16 - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 593751 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: float), _col1 (type: boolean), _col2 (type: double), _col3 (type: string), _col4 (type: tinyint), _col5 (type: int), _col6 (type: timestamp) sort order: +++++++ @@ -126,7 +126,7 @@ STAGE PLANS: className: VectorReduceSinkMultiKeyOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 593751 Basic stats: COMPLETE Column stats: NONE value expressions: _col7 (type: double), _col8 (type: double), _col9 (type: bigint), _col10 (type: double), _col11 (type: double), _col12 (type: double), _col13 (type: bigint), _col14 (type: double), _col15 (type: double), _col16 (type: bigint) Execution mode: vectorized Map Vectorization: @@ -149,15 +149,15 @@ STAGE PLANS: keys: KEY._col0 (type: float), KEY._col1 (type: boolean), KEY._col2 (type: double), KEY._col3 (type: string), KEY._col4 (type: tinyint), KEY._col5 (type: int), KEY._col6 (type: timestamp) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16 - Statistics: Num rows: 6144 Data size: 73728 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6144 Data size: 296875 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: float), _col1 (type: boolean), _col2 (type: double), _col3 (type: string), _col4 (type: tinyint), _col5 (type: int), _col6 (type: timestamp), power(((_col7 - ((_col8 * _col8) / _col9)) / CASE WHEN ((_col9 = 1L)) THEN (null) ELSE ((_col9 - 1)) END), 0.5) (type: double), (-26.28 - CAST( _col5 AS decimal(10,0))) (type: decimal(13,2)), _col10 (type: double), (_col2 * 79.553D) (type: double), (33.0 % _col0) (type: float), power(((_col11 - ((_col12 * _col12) / _col13)) / CASE WHEN ((_col13 = 1L)) THEN (null) ELSE ((_col13 - 1)) END), 0.5) (type: double), ((_col11 - ((_col12 * _col12) / _col13)) / _col13) (type: double), (-23.0D % _col2) (type: double), (- _col4) (type: tinyint), ((_col14 - ((_col15 * _col15) / _col16)) / CASE WHEN ((_col16 = 1L)) THEN (null) ELSE ((_col16 - 1)) END) (type: double), (UDFToFloat(_col5) - _col0) (type: float), (-23 % UDFToInteger(_col4)) (type: int), (- (-26.28 - CAST( _col5 AS decimal(10,0)))) (type: decimal(13,2)), power(((_col14 - ((_col15 * _col15) / _col16)) / _col16), 0.5) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 - Statistics: Num rows: 6144 Data size: 73728 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6144 Data size: 296875 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: float), _col1 (type: boolean), _col2 (type: double), _col3 (type: string), _col4 (type: tinyint), _col5 (type: int), _col6 (type: timestamp) sort order: +++++++ - Statistics: Num rows: 6144 Data size: 73728 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6144 Data size: 296875 Basic stats: COMPLETE Column stats: NONE value expressions: _col7 (type: double), _col8 (type: decimal(13,2)), _col9 (type: double), _col10 (type: double), _col11 (type: float), _col12 (type: double), _col13 (type: double), _col14 (type: double), _col15 (type: tinyint), _col16 (type: double), _col17 (type: float), _col18 (type: int), _col19 (type: decimal(13,2)), _col20 (type: double) Reducer 3 Reduce Vectorization: @@ -168,10 +168,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: float), KEY.reducesinkkey1 (type: boolean), KEY.reducesinkkey2 (type: double), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: tinyint), KEY.reducesinkkey5 (type: int), KEY.reducesinkkey6 (type: timestamp), VALUE._col0 (type: double), VALUE._col1 (type: decimal(13,2)), VALUE._col2 (type: double), VALUE._col3 (type: double), VALUE._col4 (type: float), VALUE._col5 (type: double), VALUE._col6 (type: double), VALUE._col7 (type: double), VALUE._col8 (type: tinyint), VALUE._col9 (type: double), VALUE._col10 (type: float), VALUE._col11 (type: int), VALUE._col12 (type: decimal(13,2)), VALUE._col13 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 - Statistics: Num rows: 6144 Data size: 73728 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6144 Data size: 296875 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 6144 Data size: 73728 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6144 Data size: 296875 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/spark/parquet_vectorization_16.q.out ql/src/test/results/clientpositive/spark/parquet_vectorization_16.q.out index 2cf5fd061e..3658d8a778 100644 --- ql/src/test/results/clientpositive/spark/parquet_vectorization_16.q.out +++ ql/src/test/results/clientpositive/spark/parquet_vectorization_16.q.out @@ -62,7 +62,7 @@ STAGE PLANS: TableScan alias: alltypesparquet filterExpr: ((cstring2 like '%b%') and ((cdouble >= -1.389D) or (cstring1 < 'a'))) (type: boolean) - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 593751 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true Filter Operator @@ -71,7 +71,7 @@ STAGE PLANS: native: true predicateExpression: FilterExprAndExpr(children: FilterStringColLikeStringScalar(col 7:string, pattern %b%), FilterExprOrExpr(children: FilterDoubleColGreaterEqualDoubleScalar(col 5:double, val -1.389), FilterStringGroupColLessStringScalar(col 6:string, val a))) predicate: (((cdouble >= -1.389D) or (cstring1 < 'a')) and (cstring2 like '%b%')) (type: boolean) - Statistics: Num rows: 4096 Data size: 49152 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4096 Data size: 197917 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cstring1 (type: string), cdouble (type: double), ctimestamp1 (type: timestamp), (cdouble * cdouble) (type: double) outputColumnNames: _col0, _col1, _col2, _col3 @@ -80,7 +80,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [6, 5, 8, 13] selectExpressions: DoubleColMultiplyDoubleColumn(col 5:double, col 5:double) -> 13:double - Statistics: Num rows: 4096 Data size: 49152 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4096 Data size: 197917 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(_col1), sum(_col3), sum(_col1), min(_col1) Group By Vectorization: @@ -94,7 +94,7 @@ STAGE PLANS: keys: _col0 (type: string), _col1 (type: double), _col2 (type: timestamp) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 4096 Data size: 49152 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4096 Data size: 197917 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: double), _col2 (type: timestamp) sort order: +++ @@ -103,7 +103,7 @@ STAGE PLANS: className: VectorReduceSinkMultiKeyOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 4096 Data size: 49152 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4096 Data size: 197917 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: bigint), _col4 (type: double), _col5 (type: double), _col6 (type: double) Execution mode: vectorized Map Vectorization: @@ -137,7 +137,7 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: double), KEY._col2 (type: timestamp) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 2048 Data size: 24576 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2048 Data size: 98958 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: double), _col2 (type: timestamp), (_col1 - 9763215.5639D) (type: double), (- (_col1 - 9763215.5639D)) (type: double), _col3 (type: bigint), power(((_col4 - ((_col5 * _col5) / _col3)) / CASE WHEN ((_col3 = 1L)) THEN (null) ELSE ((_col3 - 1)) END), 0.5) (type: double), (- power(((_col4 - ((_col5 * _col5) / _col3)) / CASE WHEN ((_col3 = 1L)) THEN (null) ELSE ((_col3 - 1)) END), 0.5)) (type: double), (power(((_col4 - ((_col5 * _col5) / _col3)) / CASE WHEN ((_col3 = 1L)) THEN (null) ELSE ((_col3 - 1)) END), 0.5) * UDFToDouble(_col3)) (type: double), _col6 (type: double), (9763215.5639D / _col1) (type: double), (CAST( _col3 AS decimal(19,0)) / -1.389) (type: decimal(28,6)), power(((_col4 - ((_col5 * _col5) / _col3)) / CASE WHEN ((_col3 = 1L)) THEN (null) ELSE ((_col3 - 1)) END), 0.5) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 @@ -146,13 +146,13 @@ STAGE PLANS: native: true projectedOutputColumnNums: [0, 1, 2, 7, 9, 3, 17, 26, 36, 6, 37, 39, 47] selectExpressions: DoubleColSubtractDoubleScalar(col 1:double, val 9763215.5639) -> 7:double, DoubleColUnaryMinus(col 8:double)(children: DoubleColSubtractDoubleScalar(col 1:double, val 9763215.5639) -> 8:double) -> 9:double, FuncPowerDoubleToDouble(col 16:double)(children: DoubleColDivideLongColumn(col 12:double, col 15:bigint)(children: DoubleColSubtractDoubleColumn(col 4:double, col 11:double)(children: DoubleColDivideLongColumn(col 10:double, col 3:bigint)(children: DoubleColMultiplyDoubleColumn(col 5:double, col 5:double) -> 10:double) -> 11:double) -> 12:double, IfExprNullCondExpr(col 13:boolean, null, col 14:bigint)(children: LongColEqualLongScalar(col 3:bigint, val 1) -> 13:boolean, LongColSubtractLongScalar(col 3:bigint, val 1) -> 14:bigint) -> 15:bigint) -> 16:double) -> 17:double, DoubleColUnaryMinus(col 25:double)(children: FuncPowerDoubleToDouble(col 24:double)(children: DoubleColDivideLongColumn(col 20:double, col 23:bigint)(children: DoubleColSubtractDoubleColumn(col 4:double, col 19:double)(children: DoubleColDivideLongColumn(col 18:double, col 3:bigint)(children: DoubleColMultiplyDoubleColumn(col 5:double, col 5:double) -> 18:double) -> 19:double) -> 20:double, IfExprNullCondExpr(col 21:boolean, null, col 22:bigint)(children: LongColEqualLongScalar(col 3:bigint, val 1) -> 21:boolean, LongColSubtractLongScalar(col 3:bigint, val 1) -> 22:bigint) -> 23:bigint) -> 24:double) -> 25:double) -> 26:double, DoubleColMultiplyDoubleColumn(col 34:double, col 35:double)(children: FuncPowerDoubleToDouble(col 33:double)(children: DoubleColDivideLongColumn(col 29:double, col 32:bigint)(children: DoubleColSubtractDoubleColumn(col 4:double, col 28:double)(children: DoubleColDivideLongColumn(col 27:double, col 3:bigint)(children: DoubleColMultiplyDoubleColumn(col 5:double, col 5:double) -> 27:double) -> 28:double) -> 29:double, IfExprNullCondExpr(col 30:boolean, null, col 31:bigint)(children: LongColEqualLongScalar(col 3:bigint, val 1) -> 30:boolean, LongColSubtractLongScalar(col 3:bigint, val 1) -> 31:bigint) -> 32:bigint) -> 33:double) -> 34:double, CastLongToDouble(col 3:bigint) -> 35:double) -> 36:double, DoubleScalarDivideDoubleColumn(val 9763215.5639, col 1:double) -> 37:double, DecimalColDivideDecimalScalar(col 38:decimal(19,0), val -1.389)(children: CastLongToDecimal(col 3:bigint) -> 38:decimal(19,0)) -> 39:decimal(28,6), FuncPowerDoubleToDouble(col 46:double)(children: DoubleColDivideLongColumn(col 42:double, col 45:bigint)(children: DoubleColSubtractDoubleColumn(col 4:double, col 41:double)(children: DoubleColDivideLongColumn(col 40:double, col 3:bigint)(children: DoubleColMultiplyDoubleColumn(col 5:double, col 5:double) -> 40:double) -> 41:double) -> 42:double, IfExprNullCondExpr(col 43:boolean, null, col 44:bigint)(children: LongColEqualLongScalar(col 3:bigint, val 1) -> 43:boolean, LongColSubtractLongScalar(col 3:bigint, val 1) -> 44:bigint) -> 45:bigint) -> 46:double) -> 47:double - Statistics: Num rows: 2048 Data size: 24576 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2048 Data size: 98958 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 2048 Data size: 24576 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2048 Data size: 98958 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/spark/parquet_vectorization_17.q.out ql/src/test/results/clientpositive/spark/parquet_vectorization_17.q.out index fb8524cf1f..757592683b 100644 --- ql/src/test/results/clientpositive/spark/parquet_vectorization_17.q.out +++ ql/src/test/results/clientpositive/spark/parquet_vectorization_17.q.out @@ -70,7 +70,7 @@ STAGE PLANS: TableScan alias: alltypesparquet filterExpr: ((cbigint > -23L) and ((cdouble <> 988888.0D) or (CAST( cint AS decimal(13,3)) > -863.257)) and ((ctinyint >= 33Y) or (UDFToLong(csmallint) >= cbigint) or (UDFToDouble(cfloat) = cdouble))) (type: boolean) - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 593751 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true Filter Operator @@ -79,7 +79,7 @@ STAGE PLANS: native: true predicateExpression: FilterExprAndExpr(children: FilterLongColGreaterLongScalar(col 3:bigint, val -23), FilterExprOrExpr(children: FilterDoubleColNotEqualDoubleScalar(col 5:double, val 988888.0), FilterDecimalColGreaterDecimalScalar(col 13:decimal(13,3), val -863.257)(children: CastLongToDecimal(col 2:int) -> 13:decimal(13,3))), FilterExprOrExpr(children: FilterLongColGreaterEqualLongScalar(col 0:tinyint, val 33), FilterLongColGreaterEqualLongColumn(col 1:bigint, col 3:bigint)(children: col 1:smallint), FilterDoubleColEqualDoubleColumn(col 4:double, col 5:double)(children: col 4:float))) predicate: (((cdouble <> 988888.0D) or (CAST( cint AS decimal(13,3)) > -863.257)) and ((ctinyint >= 33Y) or (UDFToLong(csmallint) >= cbigint) or (UDFToDouble(cfloat) = cdouble)) and (cbigint > -23L)) (type: boolean) - Statistics: Num rows: 4096 Data size: 49152 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4096 Data size: 197917 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cfloat (type: float), cstring1 (type: string), cint (type: int), ctimestamp1 (type: timestamp), cdouble (type: double), cbigint (type: bigint), (UDFToDouble(cfloat) / UDFToDouble(ctinyint)) (type: double), (UDFToLong(cint) % cbigint) (type: bigint), (- cdouble) (type: double), (cdouble + (UDFToDouble(cfloat) / UDFToDouble(ctinyint))) (type: double), (cdouble / UDFToDouble(cint)) (type: double), (- (- cdouble)) (type: double), (9763215.5639 % CAST( cbigint AS decimal(19,0))) (type: decimal(11,4)), (2563.58D + (- (- cdouble))) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 @@ -88,7 +88,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [4, 6, 2, 8, 5, 3, 15, 16, 17, 20, 22, 24, 26, 29] selectExpressions: DoubleColDivideDoubleColumn(col 4:double, col 14:double)(children: col 4:float, CastLongToDouble(col 0:tinyint) -> 14:double) -> 15:double, LongColModuloLongColumn(col 2:bigint, col 3:bigint)(children: col 2:int) -> 16:bigint, DoubleColUnaryMinus(col 5:double) -> 17:double, DoubleColAddDoubleColumn(col 5:double, col 19:double)(children: DoubleColDivideDoubleColumn(col 4:double, col 18:double)(children: col 4:float, CastLongToDouble(col 0:tinyint) -> 18:double) -> 19:double) -> 20:double, DoubleColDivideDoubleColumn(col 5:double, col 21:double)(children: CastLongToDouble(col 2:int) -> 21:double) -> 22:double, DoubleColUnaryMinus(col 23:double)(children: DoubleColUnaryMinus(col 5:double) -> 23:double) -> 24:double, DecimalScalarModuloDecimalColumn(val 9763215.5639, col 25:decimal(19,0))(children: CastLongToDecimal(col 3:bigint) -> 25:decimal(19,0)) -> 26:decimal(11,4), DoubleScalarAddDoubleColumn(val 2563.58, col 28:double)(children: DoubleColUnaryMinus(col 27:double)(children: DoubleColUnaryMinus(col 5:double) -> 27:double) -> 28:double) -> 29:double - Statistics: Num rows: 4096 Data size: 49152 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4096 Data size: 197917 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col5 (type: bigint), _col0 (type: float) sort order: ++ @@ -96,7 +96,7 @@ STAGE PLANS: className: VectorReduceSinkObjectHashOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 4096 Data size: 49152 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4096 Data size: 197917 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string), _col2 (type: int), _col3 (type: timestamp), _col4 (type: double), _col6 (type: double), _col7 (type: bigint), _col8 (type: double), _col9 (type: double), _col10 (type: double), _col11 (type: double), _col12 (type: decimal(11,4)), _col13 (type: double) Execution mode: vectorized Map Vectorization: @@ -124,13 +124,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [1, 2, 3, 4, 5, 0, 6, 7, 8, 9, 10, 11, 12, 13] - Statistics: Num rows: 4096 Data size: 49152 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4096 Data size: 197917 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 4096 Data size: 49152 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4096 Data size: 197917 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/spark/parquet_vectorization_2.q.out ql/src/test/results/clientpositive/spark/parquet_vectorization_2.q.out index 989d3cbf7a..3f4ef30574 100644 --- ql/src/test/results/clientpositive/spark/parquet_vectorization_2.q.out +++ ql/src/test/results/clientpositive/spark/parquet_vectorization_2.q.out @@ -68,7 +68,7 @@ STAGE PLANS: TableScan alias: alltypesparquet filterExpr: (((ctimestamp1 < ctimestamp2) and (cstring2 like 'b%') and (cfloat <= -5638.15)) or ((cdouble < UDFToDouble(ctinyint)) and ((UDFToDouble(ctimestamp2) <> -10669.0D) or (cint < 359)))) (type: boolean) - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 593751 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true Filter Operator @@ -77,7 +77,7 @@ STAGE PLANS: native: true predicateExpression: FilterExprOrExpr(children: FilterExprAndExpr(children: FilterTimestampColLessTimestampColumn(col 8:timestamp, col 9:timestamp), FilterStringColLikeStringScalar(col 7:string, pattern b%), FilterDoubleColLessEqualDoubleScalar(col 4:float, val -5638.14990234375)), FilterExprAndExpr(children: FilterDoubleColLessDoubleColumn(col 5:double, col 13:double)(children: CastLongToDouble(col 0:tinyint) -> 13:double), FilterExprOrExpr(children: FilterDoubleColNotEqualDoubleScalar(col 14:double, val -10669.0)(children: CastTimestampToDouble(col 9:timestamp) -> 14:double), FilterLongColLessLongScalar(col 2:int, val 359)))) predicate: (((cdouble < UDFToDouble(ctinyint)) and ((UDFToDouble(ctimestamp2) <> -10669.0D) or (cint < 359))) or ((ctimestamp1 < ctimestamp2) and (cstring2 like 'b%') and (cfloat <= -5638.15))) (type: boolean) - Statistics: Num rows: 4778 Data size: 57336 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4778 Data size: 230870 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: csmallint (type: smallint), cfloat (type: float), cbigint (type: bigint), ctinyint (type: tinyint), cdouble (type: double), UDFToDouble(cbigint) (type: double), (UDFToDouble(cbigint) * UDFToDouble(cbigint)) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 @@ -86,7 +86,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [1, 4, 3, 0, 5, 15, 18] selectExpressions: CastLongToDouble(col 3:bigint) -> 15:double, DoubleColMultiplyDoubleColumn(col 16:double, col 17:double)(children: CastLongToDouble(col 3:bigint) -> 16:double, CastLongToDouble(col 3:bigint) -> 17:double) -> 18:double - Statistics: Num rows: 4778 Data size: 57336 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4778 Data size: 230870 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(_col0), count(_col0), sum(_col1), sum(_col6), sum(_col5), count(_col2), count(), min(_col3), sum(_col4), count(_col4) Group By Vectorization: diff --git ql/src/test/results/clientpositive/spark/parquet_vectorization_3.q.out ql/src/test/results/clientpositive/spark/parquet_vectorization_3.q.out index 9da3200fd8..50a3de0eff 100644 --- ql/src/test/results/clientpositive/spark/parquet_vectorization_3.q.out +++ ql/src/test/results/clientpositive/spark/parquet_vectorization_3.q.out @@ -73,7 +73,7 @@ STAGE PLANS: TableScan alias: alltypesparquet filterExpr: (((UDFToFloat(cint) <= cfloat) and (CAST( cbigint AS decimal(22,3)) <> 79.553) and (UDFToDouble(ctimestamp2) = -29071.0D)) or ((UDFToDouble(cbigint) > cdouble) and (CAST( csmallint AS decimal(8,3)) >= 79.553) and (ctimestamp1 > ctimestamp2))) (type: boolean) - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 593751 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true Filter Operator @@ -82,7 +82,7 @@ STAGE PLANS: native: true predicateExpression: FilterExprOrExpr(children: FilterExprAndExpr(children: FilterDoubleColLessEqualDoubleColumn(col 13:float, col 4:float)(children: CastLongToFloatViaLongToDouble(col 2:int) -> 13:float), FilterDecimalColNotEqualDecimalScalar(col 14:decimal(22,3), val 79.553)(children: CastLongToDecimal(col 3:bigint) -> 14:decimal(22,3)), FilterDoubleColEqualDoubleScalar(col 15:double, val -29071.0)(children: CastTimestampToDouble(col 9:timestamp) -> 15:double)), FilterExprAndExpr(children: FilterDoubleColGreaterDoubleColumn(col 16:double, col 5:double)(children: CastLongToDouble(col 3:bigint) -> 16:double), FilterDecimalColGreaterEqualDecimalScalar(col 17:decimal(8,3), val 79.553)(children: CastLongToDecimal(col 1:smallint) -> 17:decimal(8,3)), FilterTimestampColGreaterTimestampColumn(col 8:timestamp, col 9:timestamp))) predicate: (((UDFToDouble(cbigint) > cdouble) and (CAST( csmallint AS decimal(8,3)) >= 79.553) and (ctimestamp1 > ctimestamp2)) or ((UDFToFloat(cint) <= cfloat) and (CAST( cbigint AS decimal(22,3)) <> 79.553) and (UDFToDouble(ctimestamp2) = -29071.0D))) (type: boolean) - Statistics: Num rows: 2503 Data size: 30036 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2503 Data size: 120943 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: csmallint (type: smallint), ctinyint (type: tinyint), cfloat (type: float), cint (type: int), UDFToDouble(csmallint) (type: double), (UDFToDouble(csmallint) * UDFToDouble(csmallint)) (type: double), UDFToDouble(ctinyint) (type: double), (UDFToDouble(ctinyint) * UDFToDouble(ctinyint)) (type: double), UDFToDouble(cfloat) (type: double), (UDFToDouble(cfloat) * UDFToDouble(cfloat)) (type: double), UDFToDouble(cint) (type: double), (UDFToDouble(cint) * UDFToDouble(cint)) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 @@ -91,7 +91,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [1, 0, 4, 2, 18, 21, 22, 25, 4, 26, 27, 30] selectExpressions: CastLongToDouble(col 1:smallint) -> 18:double, DoubleColMultiplyDoubleColumn(col 19:double, col 20:double)(children: CastLongToDouble(col 1:smallint) -> 19:double, CastLongToDouble(col 1:smallint) -> 20:double) -> 21:double, CastLongToDouble(col 0:tinyint) -> 22:double, DoubleColMultiplyDoubleColumn(col 23:double, col 24:double)(children: CastLongToDouble(col 0:tinyint) -> 23:double, CastLongToDouble(col 0:tinyint) -> 24:double) -> 25:double, DoubleColMultiplyDoubleColumn(col 4:double, col 4:double)(children: col 4:float, col 4:float) -> 26:double, CastLongToDouble(col 2:int) -> 27:double, DoubleColMultiplyDoubleColumn(col 28:double, col 29:double)(children: CastLongToDouble(col 2:int) -> 28:double, CastLongToDouble(col 2:int) -> 29:double) -> 30:double - Statistics: Num rows: 2503 Data size: 30036 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2503 Data size: 120943 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(_col5), sum(_col4), count(_col0), sum(_col7), sum(_col6), count(_col1), sum(_col9), sum(_col8), count(_col2), sum(_col2), sum(_col3), count(_col3), sum(_col11), sum(_col10) Group By Vectorization: diff --git ql/src/test/results/clientpositive/spark/parquet_vectorization_4.q.out ql/src/test/results/clientpositive/spark/parquet_vectorization_4.q.out index 0324a2f434..0ca1e7e305 100644 --- ql/src/test/results/clientpositive/spark/parquet_vectorization_4.q.out +++ ql/src/test/results/clientpositive/spark/parquet_vectorization_4.q.out @@ -68,7 +68,7 @@ STAGE PLANS: TableScan alias: alltypesparquet filterExpr: ((UDFToInteger(csmallint) >= cint) or ((UDFToInteger(ctinyint) <= -89010) and (cdouble > 79.553D)) or ((cbigint <> -563L) and ((UDFToLong(ctinyint) <> cbigint) or (cdouble <= -3728.0D)))) (type: boolean) - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 593751 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true Filter Operator @@ -77,7 +77,7 @@ STAGE PLANS: native: true predicateExpression: FilterExprOrExpr(children: FilterLongColGreaterEqualLongColumn(col 1:int, col 2:int)(children: col 1:smallint), FilterExprAndExpr(children: FilterLongColLessEqualLongScalar(col 0:int, val -89010)(children: col 0:tinyint), FilterDoubleColGreaterDoubleScalar(col 5:double, val 79.553)), FilterExprAndExpr(children: FilterLongColNotEqualLongScalar(col 3:bigint, val -563), FilterExprOrExpr(children: FilterLongColNotEqualLongColumn(col 0:bigint, col 3:bigint)(children: col 0:tinyint), FilterDoubleColLessEqualDoubleScalar(col 5:double, val -3728.0)))) predicate: (((UDFToInteger(ctinyint) <= -89010) and (cdouble > 79.553D)) or ((cbigint <> -563L) and ((UDFToLong(ctinyint) <> cbigint) or (cdouble <= -3728.0D))) or (UDFToInteger(csmallint) >= cint)) (type: boolean) - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 593751 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cint (type: int), cdouble (type: double), ctinyint (type: tinyint), (cdouble * cdouble) (type: double) outputColumnNames: _col0, _col1, _col2, _col3 @@ -86,7 +86,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [2, 5, 0, 13] selectExpressions: DoubleColMultiplyDoubleColumn(col 5:double, col 5:double) -> 13:double - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 593751 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(_col0), sum(_col3), sum(_col1), count(_col1), min(_col2) Group By Vectorization: diff --git ql/src/test/results/clientpositive/spark/parquet_vectorization_5.q.out ql/src/test/results/clientpositive/spark/parquet_vectorization_5.q.out index d40baf56a6..404f17bb3b 100644 --- ql/src/test/results/clientpositive/spark/parquet_vectorization_5.q.out +++ ql/src/test/results/clientpositive/spark/parquet_vectorization_5.q.out @@ -62,7 +62,7 @@ STAGE PLANS: TableScan alias: alltypesparquet filterExpr: ((cboolean2 is not null and (cstring1 like '%b%')) or ((UDFToDouble(ctinyint) = cdouble) and ctimestamp2 is not null and (cstring2 like 'a'))) (type: boolean) - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 593751 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true Filter Operator @@ -71,7 +71,7 @@ STAGE PLANS: native: true predicateExpression: FilterExprOrExpr(children: FilterExprAndExpr(children: SelectColumnIsNotNull(col 11:boolean), FilterStringColLikeStringScalar(col 6:string, pattern %b%)), FilterExprAndExpr(children: FilterDoubleColEqualDoubleColumn(col 13:double, col 5:double)(children: CastLongToDouble(col 0:tinyint) -> 13:double), SelectColumnIsNotNull(col 9:timestamp), FilterStringColLikeStringScalar(col 7:string, pattern a))) predicate: (((UDFToDouble(ctinyint) = cdouble) and ctimestamp2 is not null and (cstring2 like 'a')) or (cboolean2 is not null and (cstring1 like '%b%'))) (type: boolean) - Statistics: Num rows: 9216 Data size: 110592 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 9216 Data size: 445313 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int) outputColumnNames: ctinyint, csmallint, cint @@ -79,7 +79,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1, 2] - Statistics: Num rows: 9216 Data size: 110592 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 9216 Data size: 445313 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: max(csmallint), count(), min(csmallint), sum(cint), max(ctinyint) Group By Vectorization: diff --git ql/src/test/results/clientpositive/spark/parquet_vectorization_6.q.out ql/src/test/results/clientpositive/spark/parquet_vectorization_6.q.out index eef9f39e26..6eabffb84d 100644 --- ql/src/test/results/clientpositive/spark/parquet_vectorization_6.q.out +++ ql/src/test/results/clientpositive/spark/parquet_vectorization_6.q.out @@ -62,7 +62,7 @@ STAGE PLANS: TableScan alias: alltypesparquet filterExpr: ((ctinyint <> 0Y) and (((cboolean1 <= 0) and (cboolean2 >= cboolean1)) or (cbigint is not null and ((cstring2 like '%a') or (cfloat <= -257.0))))) (type: boolean) - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 593751 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true Filter Operator @@ -71,7 +71,7 @@ STAGE PLANS: native: true predicateExpression: FilterExprAndExpr(children: FilterLongColNotEqualLongScalar(col 0:tinyint, val 0), FilterExprOrExpr(children: FilterExprAndExpr(children: FilterLongColLessEqualLongScalar(col 10:boolean, val 0), FilterLongColGreaterEqualLongColumn(col 11:boolean, col 10:boolean)), FilterExprAndExpr(children: SelectColumnIsNotNull(col 3:bigint), FilterExprOrExpr(children: FilterStringColLikeStringScalar(col 7:string, pattern %a), FilterDoubleColLessEqualDoubleScalar(col 4:float, val -257.0))))) predicate: ((((cboolean1 <= 0) and (cboolean2 >= cboolean1)) or (cbigint is not null and ((cstring2 like '%a') or (cfloat <= -257.0)))) and (ctinyint <> 0Y)) (type: boolean) - Statistics: Num rows: 11605 Data size: 139260 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 11605 Data size: 560748 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cboolean1 (type: boolean), cfloat (type: float), cstring1 (type: string), (988888 * UDFToInteger(csmallint)) (type: int), (- csmallint) (type: smallint), (- cfloat) (type: float), (-26.28D / UDFToDouble(cfloat)) (type: double), (cfloat * 359.0) (type: float), (cint % UDFToInteger(ctinyint)) (type: int), (- cdouble) (type: double), (UDFToInteger(ctinyint) - -75) (type: int), (762 * (cint % UDFToInteger(ctinyint))) (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 @@ -80,13 +80,13 @@ STAGE PLANS: native: true projectedOutputColumnNums: [10, 4, 6, 13, 14, 15, 16, 17, 18, 19, 20, 22] selectExpressions: LongScalarMultiplyLongColumn(val 988888, col 1:int)(children: col 1:smallint) -> 13:int, LongColUnaryMinus(col 1:smallint) -> 14:smallint, DoubleColUnaryMinus(col 4:float) -> 15:float, DoubleScalarDivideDoubleColumn(val -26.28, col 4:double)(children: col 4:float) -> 16:double, DoubleColMultiplyDoubleScalar(col 4:float, val 359.0) -> 17:float, LongColModuloLongColumn(col 2:int, col 0:int)(children: col 0:tinyint) -> 18:int, DoubleColUnaryMinus(col 5:double) -> 19:double, LongColSubtractLongScalar(col 0:int, val -75)(children: col 0:tinyint) -> 20:int, LongScalarMultiplyLongColumn(val 762, col 21:int)(children: LongColModuloLongColumn(col 2:int, col 0:int)(children: col 0:tinyint) -> 21:int) -> 22:int - Statistics: Num rows: 11605 Data size: 139260 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 11605 Data size: 560748 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 11605 Data size: 139260 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 11605 Data size: 560748 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/spark/parquet_vectorization_7.q.out ql/src/test/results/clientpositive/spark/parquet_vectorization_7.q.out index b84e199470..0f0345193b 100644 --- ql/src/test/results/clientpositive/spark/parquet_vectorization_7.q.out +++ ql/src/test/results/clientpositive/spark/parquet_vectorization_7.q.out @@ -76,7 +76,7 @@ STAGE PLANS: TableScan alias: alltypesparquet filterExpr: ((ctinyint <> 0Y) and ((UDFToDouble(ctimestamp1) <= -28800.0D) or (UDFToInteger(ctinyint) = cint) or (cstring2 like 'ss')) and ((cdouble > 988888.0D) or ((UDFToDouble(ctimestamp2) > -28815.0D) and (cdouble <= 3569.0D)))) (type: boolean) - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 593751 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true Filter Operator @@ -85,7 +85,7 @@ STAGE PLANS: native: true predicateExpression: FilterExprAndExpr(children: FilterLongColNotEqualLongScalar(col 0:tinyint, val 0), FilterExprOrExpr(children: FilterDoubleColLessEqualDoubleScalar(col 13:double, val -28800.0)(children: CastTimestampToDouble(col 8:timestamp) -> 13:double), FilterLongColEqualLongColumn(col 0:int, col 2:int)(children: col 0:tinyint), FilterStringColLikeStringScalar(col 7:string, pattern ss)), FilterExprOrExpr(children: FilterDoubleColGreaterDoubleScalar(col 5:double, val 988888.0), FilterExprAndExpr(children: FilterDoubleColGreaterDoubleScalar(col 14:double, val -28815.0)(children: CastTimestampToDouble(col 9:timestamp) -> 14:double), FilterDoubleColLessEqualDoubleScalar(col 5:double, val 3569.0)))) predicate: (((UDFToDouble(ctimestamp1) <= -28800.0D) or (UDFToInteger(ctinyint) = cint) or (cstring2 like 'ss')) and ((cdouble > 988888.0D) or ((UDFToDouble(ctimestamp2) > -28815.0D) and (cdouble <= 3569.0D))) and (ctinyint <> 0Y)) (type: boolean) - Statistics: Num rows: 5461 Data size: 65532 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5461 Data size: 263873 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cboolean1 (type: boolean), cbigint (type: bigint), csmallint (type: smallint), ctinyint (type: tinyint), ctimestamp1 (type: timestamp), cstring1 (type: string), (cbigint + cbigint) (type: bigint), (UDFToInteger(csmallint) % -257) (type: int), (- csmallint) (type: smallint), (- ctinyint) (type: tinyint), (UDFToInteger((- ctinyint)) + 17) (type: int), (cbigint * UDFToLong((- csmallint))) (type: bigint), (cint % UDFToInteger(csmallint)) (type: int), (- ctinyint) (type: tinyint), ((- ctinyint) % ctinyint) (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 @@ -94,7 +94,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [10, 3, 1, 0, 8, 6, 15, 16, 17, 18, 20, 22, 23, 24, 26] selectExpressions: LongColAddLongColumn(col 3:bigint, col 3:bigint) -> 15:bigint, LongColModuloLongScalar(col 1:int, val -257)(children: col 1:smallint) -> 16:int, LongColUnaryMinus(col 1:smallint) -> 17:smallint, LongColUnaryMinus(col 0:tinyint) -> 18:tinyint, LongColAddLongScalar(col 19:int, val 17)(children: LongColUnaryMinus(col 0:tinyint) -> 19:tinyint) -> 20:int, LongColMultiplyLongColumn(col 3:bigint, col 21:bigint)(children: LongColUnaryMinus(col 1:smallint) -> 21:smallint) -> 22:bigint, LongColModuloLongColumn(col 2:int, col 1:int)(children: col 1:smallint) -> 23:int, LongColUnaryMinus(col 0:tinyint) -> 24:tinyint, LongColModuloLongColumn(col 25:tinyint, col 0:tinyint)(children: LongColUnaryMinus(col 0:tinyint) -> 25:tinyint) -> 26:tinyint - Statistics: Num rows: 5461 Data size: 65532 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5461 Data size: 263873 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: boolean), _col1 (type: bigint), _col2 (type: smallint), _col3 (type: tinyint), _col4 (type: timestamp), _col5 (type: string), _col6 (type: bigint), _col7 (type: int), _col8 (type: smallint), _col9 (type: tinyint), _col10 (type: int), _col11 (type: bigint), _col12 (type: int), _col13 (type: tinyint), _col14 (type: tinyint) sort order: +++++++++++++++ @@ -102,7 +102,7 @@ STAGE PLANS: className: VectorReduceSinkObjectHashOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 5461 Data size: 65532 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5461 Data size: 263873 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 Execution mode: vectorized Map Vectorization: @@ -130,19 +130,19 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 9, 14] - Statistics: Num rows: 5461 Data size: 65532 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5461 Data size: 263873 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 25 Limit Vectorization: className: VectorLimitOperator native: true - Statistics: Num rows: 25 Data size: 300 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 1200 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 25 Data size: 300 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 1200 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -313,7 +313,7 @@ STAGE PLANS: TableScan alias: alltypesparquet filterExpr: ((ctinyint <> 0Y) and ((UDFToDouble(ctimestamp1) <= -28800.0D) or (UDFToInteger(ctinyint) = cint) or (cstring2 like 'ss')) and ((cdouble > 988888.0D) or ((UDFToDouble(ctimestamp2) > -28792.315D) and (cdouble <= 3569.0D)))) (type: boolean) - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 593751 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true Filter Operator @@ -322,7 +322,7 @@ STAGE PLANS: native: true predicateExpression: FilterExprAndExpr(children: FilterLongColNotEqualLongScalar(col 0:tinyint, val 0), FilterExprOrExpr(children: FilterDoubleColLessEqualDoubleScalar(col 13:double, val -28800.0)(children: CastTimestampToDouble(col 8:timestamp) -> 13:double), FilterLongColEqualLongColumn(col 0:int, col 2:int)(children: col 0:tinyint), FilterStringColLikeStringScalar(col 7:string, pattern ss)), FilterExprOrExpr(children: FilterDoubleColGreaterDoubleScalar(col 5:double, val 988888.0), FilterExprAndExpr(children: FilterDoubleColGreaterDoubleScalar(col 14:double, val -28792.315)(children: CastTimestampToDouble(col 9:timestamp) -> 14:double), FilterDoubleColLessEqualDoubleScalar(col 5:double, val 3569.0)))) predicate: (((UDFToDouble(ctimestamp1) <= -28800.0D) or (UDFToInteger(ctinyint) = cint) or (cstring2 like 'ss')) and ((cdouble > 988888.0D) or ((UDFToDouble(ctimestamp2) > -28792.315D) and (cdouble <= 3569.0D))) and (ctinyint <> 0Y)) (type: boolean) - Statistics: Num rows: 5461 Data size: 65532 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5461 Data size: 263873 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cboolean1 (type: boolean), cbigint (type: bigint), csmallint (type: smallint), ctinyint (type: tinyint), ctimestamp1 (type: timestamp), cstring1 (type: string), (cbigint + cbigint) (type: bigint), (UDFToInteger(csmallint) % -257) (type: int), (- csmallint) (type: smallint), (- ctinyint) (type: tinyint), (UDFToInteger((- ctinyint)) + 17) (type: int), (cbigint * UDFToLong((- csmallint))) (type: bigint), (cint % UDFToInteger(csmallint)) (type: int), (- ctinyint) (type: tinyint), ((- ctinyint) % ctinyint) (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 @@ -331,7 +331,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [10, 3, 1, 0, 8, 6, 15, 16, 17, 18, 20, 22, 23, 24, 26] selectExpressions: LongColAddLongColumn(col 3:bigint, col 3:bigint) -> 15:bigint, LongColModuloLongScalar(col 1:int, val -257)(children: col 1:smallint) -> 16:int, LongColUnaryMinus(col 1:smallint) -> 17:smallint, LongColUnaryMinus(col 0:tinyint) -> 18:tinyint, LongColAddLongScalar(col 19:int, val 17)(children: LongColUnaryMinus(col 0:tinyint) -> 19:tinyint) -> 20:int, LongColMultiplyLongColumn(col 3:bigint, col 21:bigint)(children: LongColUnaryMinus(col 1:smallint) -> 21:smallint) -> 22:bigint, LongColModuloLongColumn(col 2:int, col 1:int)(children: col 1:smallint) -> 23:int, LongColUnaryMinus(col 0:tinyint) -> 24:tinyint, LongColModuloLongColumn(col 25:tinyint, col 0:tinyint)(children: LongColUnaryMinus(col 0:tinyint) -> 25:tinyint) -> 26:tinyint - Statistics: Num rows: 5461 Data size: 65532 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5461 Data size: 263873 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: boolean), _col1 (type: bigint), _col2 (type: smallint), _col3 (type: tinyint), _col4 (type: timestamp), _col5 (type: string), _col6 (type: bigint), _col7 (type: int), _col8 (type: smallint), _col9 (type: tinyint), _col10 (type: int), _col11 (type: bigint), _col12 (type: int), _col13 (type: tinyint), _col14 (type: tinyint) sort order: +++++++++++++++ @@ -339,7 +339,7 @@ STAGE PLANS: className: VectorReduceSinkObjectHashOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 5461 Data size: 65532 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5461 Data size: 263873 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 Execution mode: vectorized Map Vectorization: @@ -367,19 +367,19 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 9, 14] - Statistics: Num rows: 5461 Data size: 65532 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5461 Data size: 263873 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 25 Limit Vectorization: className: VectorLimitOperator native: true - Statistics: Num rows: 25 Data size: 300 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 1200 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 25 Data size: 300 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 1200 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/spark/parquet_vectorization_8.q.out ql/src/test/results/clientpositive/spark/parquet_vectorization_8.q.out index 31f1d4943e..357a321c27 100644 --- ql/src/test/results/clientpositive/spark/parquet_vectorization_8.q.out +++ ql/src/test/results/clientpositive/spark/parquet_vectorization_8.q.out @@ -72,7 +72,7 @@ STAGE PLANS: TableScan alias: alltypesparquet filterExpr: ((cstring2 is not null and (UDFToDouble(ctimestamp1) <= 10.0D) and (UDFToDouble(ctimestamp2) <> 16.0D)) or (cfloat < -6432.0) or (cboolean1 is not null and (cdouble = 988888.0D))) (type: boolean) - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 593751 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true Filter Operator @@ -81,7 +81,7 @@ STAGE PLANS: native: true predicateExpression: FilterExprOrExpr(children: FilterExprAndExpr(children: SelectColumnIsNotNull(col 7:string), FilterDoubleColLessEqualDoubleScalar(col 13:double, val 10.0)(children: CastTimestampToDouble(col 8:timestamp) -> 13:double), FilterDoubleColNotEqualDoubleScalar(col 14:double, val 16.0)(children: CastTimestampToDouble(col 9:timestamp) -> 14:double)), FilterDoubleColLessDoubleScalar(col 4:float, val -6432.0), FilterExprAndExpr(children: SelectColumnIsNotNull(col 10:boolean), FilterDoubleColEqualDoubleScalar(col 5:double, val 988888.0))) predicate: ((cboolean1 is not null and (cdouble = 988888.0D)) or (cfloat < -6432.0) or (cstring2 is not null and (UDFToDouble(ctimestamp1) <= 10.0D) and (UDFToDouble(ctimestamp2) <> 16.0D))) (type: boolean) - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 593751 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ctimestamp1 (type: timestamp), cdouble (type: double), cboolean1 (type: boolean), cstring1 (type: string), cfloat (type: float), (- cdouble) (type: double), (-5638.15D - cdouble) (type: double), (cdouble * -257.0D) (type: double), (UDFToFloat(cint) + cfloat) (type: float), ((- cdouble) + UDFToDouble(cbigint)) (type: double), (- cdouble) (type: double), (-1.389 - cfloat) (type: float), (- cfloat) (type: float), ((-5638.15D - cdouble) + UDFToDouble((UDFToFloat(cint) + cfloat))) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 @@ -90,7 +90,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [8, 5, 10, 6, 4, 15, 16, 17, 19, 22, 23, 24, 25, 29] selectExpressions: DoubleColUnaryMinus(col 5:double) -> 15:double, DoubleScalarSubtractDoubleColumn(val -5638.15, col 5:double) -> 16:double, DoubleColMultiplyDoubleScalar(col 5:double, val -257.0) -> 17:double, DoubleColAddDoubleColumn(col 18:float, col 4:float)(children: CastLongToFloatViaLongToDouble(col 2:int) -> 18:float) -> 19:float, DoubleColAddDoubleColumn(col 20:double, col 21:double)(children: DoubleColUnaryMinus(col 5:double) -> 20:double, CastLongToDouble(col 3:bigint) -> 21:double) -> 22:double, DoubleColUnaryMinus(col 5:double) -> 23:double, DoubleScalarSubtractDoubleColumn(val -1.3890000581741333, col 4:float) -> 24:float, DoubleColUnaryMinus(col 4:float) -> 25:float, DoubleColAddDoubleColumn(col 26:double, col 28:double)(children: DoubleScalarSubtractDoubleColumn(val -5638.15, col 5:double) -> 26:double, DoubleColAddDoubleColumn(col 27:float, col 4:float)(children: CastLongToFloatViaLongToDouble(col 2:int) -> 27:float) -> 28:float) -> 29:double - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 593751 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: timestamp), _col1 (type: double), _col2 (type: boolean), _col3 (type: string), _col4 (type: float), _col5 (type: double), _col6 (type: double), _col7 (type: double), _col8 (type: float), _col9 (type: double), _col10 (type: double), _col11 (type: float), _col12 (type: float), _col13 (type: double) sort order: ++++++++++++++ @@ -98,7 +98,7 @@ STAGE PLANS: className: VectorReduceSinkObjectHashOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 593751 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 Execution mode: vectorized Map Vectorization: @@ -126,19 +126,19 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 5, 11, 12, 13] - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 593751 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 20 Limit Vectorization: className: VectorLimitOperator native: true - Statistics: Num rows: 20 Data size: 240 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 960 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 20 Data size: 240 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 960 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -296,7 +296,7 @@ STAGE PLANS: TableScan alias: alltypesparquet filterExpr: ((cstring2 is not null and (UDFToDouble(ctimestamp1) <= 12.503D) and (UDFToDouble(ctimestamp2) <> 11.998D)) or (cfloat < -6432.0) or (cboolean1 is not null and (cdouble = 988888.0D))) (type: boolean) - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 593751 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true Filter Operator @@ -305,7 +305,7 @@ STAGE PLANS: native: true predicateExpression: FilterExprOrExpr(children: FilterExprAndExpr(children: SelectColumnIsNotNull(col 7:string), FilterDoubleColLessEqualDoubleScalar(col 13:double, val 12.503)(children: CastTimestampToDouble(col 8:timestamp) -> 13:double), FilterDoubleColNotEqualDoubleScalar(col 14:double, val 11.998)(children: CastTimestampToDouble(col 9:timestamp) -> 14:double)), FilterDoubleColLessDoubleScalar(col 4:float, val -6432.0), FilterExprAndExpr(children: SelectColumnIsNotNull(col 10:boolean), FilterDoubleColEqualDoubleScalar(col 5:double, val 988888.0))) predicate: ((cboolean1 is not null and (cdouble = 988888.0D)) or (cfloat < -6432.0) or (cstring2 is not null and (UDFToDouble(ctimestamp1) <= 12.503D) and (UDFToDouble(ctimestamp2) <> 11.998D))) (type: boolean) - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 593751 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ctimestamp1 (type: timestamp), cdouble (type: double), cboolean1 (type: boolean), cstring1 (type: string), cfloat (type: float), (- cdouble) (type: double), (-5638.15D - cdouble) (type: double), (cdouble * -257.0D) (type: double), (UDFToFloat(cint) + cfloat) (type: float), ((- cdouble) + UDFToDouble(cbigint)) (type: double), (- cdouble) (type: double), (-1.389 - cfloat) (type: float), (- cfloat) (type: float), ((-5638.15D - cdouble) + UDFToDouble((UDFToFloat(cint) + cfloat))) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 @@ -314,7 +314,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [8, 5, 10, 6, 4, 15, 16, 17, 19, 22, 23, 24, 25, 29] selectExpressions: DoubleColUnaryMinus(col 5:double) -> 15:double, DoubleScalarSubtractDoubleColumn(val -5638.15, col 5:double) -> 16:double, DoubleColMultiplyDoubleScalar(col 5:double, val -257.0) -> 17:double, DoubleColAddDoubleColumn(col 18:float, col 4:float)(children: CastLongToFloatViaLongToDouble(col 2:int) -> 18:float) -> 19:float, DoubleColAddDoubleColumn(col 20:double, col 21:double)(children: DoubleColUnaryMinus(col 5:double) -> 20:double, CastLongToDouble(col 3:bigint) -> 21:double) -> 22:double, DoubleColUnaryMinus(col 5:double) -> 23:double, DoubleScalarSubtractDoubleColumn(val -1.3890000581741333, col 4:float) -> 24:float, DoubleColUnaryMinus(col 4:float) -> 25:float, DoubleColAddDoubleColumn(col 26:double, col 28:double)(children: DoubleScalarSubtractDoubleColumn(val -5638.15, col 5:double) -> 26:double, DoubleColAddDoubleColumn(col 27:float, col 4:float)(children: CastLongToFloatViaLongToDouble(col 2:int) -> 27:float) -> 28:float) -> 29:double - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 593751 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: timestamp), _col1 (type: double), _col2 (type: boolean), _col3 (type: string), _col4 (type: float), _col5 (type: double), _col6 (type: double), _col7 (type: double), _col8 (type: float), _col9 (type: double), _col10 (type: double), _col11 (type: float), _col12 (type: float), _col13 (type: double) sort order: ++++++++++++++ @@ -322,7 +322,7 @@ STAGE PLANS: className: VectorReduceSinkObjectHashOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 593751 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 Execution mode: vectorized Map Vectorization: @@ -350,19 +350,19 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 5, 11, 12, 13] - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 593751 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 20 Limit Vectorization: className: VectorLimitOperator native: true - Statistics: Num rows: 20 Data size: 240 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 960 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 20 Data size: 240 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 960 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/spark/parquet_vectorization_9.q.out ql/src/test/results/clientpositive/spark/parquet_vectorization_9.q.out index 2cf5fd061e..3658d8a778 100644 --- ql/src/test/results/clientpositive/spark/parquet_vectorization_9.q.out +++ ql/src/test/results/clientpositive/spark/parquet_vectorization_9.q.out @@ -62,7 +62,7 @@ STAGE PLANS: TableScan alias: alltypesparquet filterExpr: ((cstring2 like '%b%') and ((cdouble >= -1.389D) or (cstring1 < 'a'))) (type: boolean) - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 593751 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true Filter Operator @@ -71,7 +71,7 @@ STAGE PLANS: native: true predicateExpression: FilterExprAndExpr(children: FilterStringColLikeStringScalar(col 7:string, pattern %b%), FilterExprOrExpr(children: FilterDoubleColGreaterEqualDoubleScalar(col 5:double, val -1.389), FilterStringGroupColLessStringScalar(col 6:string, val a))) predicate: (((cdouble >= -1.389D) or (cstring1 < 'a')) and (cstring2 like '%b%')) (type: boolean) - Statistics: Num rows: 4096 Data size: 49152 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4096 Data size: 197917 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cstring1 (type: string), cdouble (type: double), ctimestamp1 (type: timestamp), (cdouble * cdouble) (type: double) outputColumnNames: _col0, _col1, _col2, _col3 @@ -80,7 +80,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [6, 5, 8, 13] selectExpressions: DoubleColMultiplyDoubleColumn(col 5:double, col 5:double) -> 13:double - Statistics: Num rows: 4096 Data size: 49152 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4096 Data size: 197917 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(_col1), sum(_col3), sum(_col1), min(_col1) Group By Vectorization: @@ -94,7 +94,7 @@ STAGE PLANS: keys: _col0 (type: string), _col1 (type: double), _col2 (type: timestamp) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 4096 Data size: 49152 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4096 Data size: 197917 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: double), _col2 (type: timestamp) sort order: +++ @@ -103,7 +103,7 @@ STAGE PLANS: className: VectorReduceSinkMultiKeyOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 4096 Data size: 49152 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4096 Data size: 197917 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: bigint), _col4 (type: double), _col5 (type: double), _col6 (type: double) Execution mode: vectorized Map Vectorization: @@ -137,7 +137,7 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: double), KEY._col2 (type: timestamp) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 2048 Data size: 24576 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2048 Data size: 98958 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: double), _col2 (type: timestamp), (_col1 - 9763215.5639D) (type: double), (- (_col1 - 9763215.5639D)) (type: double), _col3 (type: bigint), power(((_col4 - ((_col5 * _col5) / _col3)) / CASE WHEN ((_col3 = 1L)) THEN (null) ELSE ((_col3 - 1)) END), 0.5) (type: double), (- power(((_col4 - ((_col5 * _col5) / _col3)) / CASE WHEN ((_col3 = 1L)) THEN (null) ELSE ((_col3 - 1)) END), 0.5)) (type: double), (power(((_col4 - ((_col5 * _col5) / _col3)) / CASE WHEN ((_col3 = 1L)) THEN (null) ELSE ((_col3 - 1)) END), 0.5) * UDFToDouble(_col3)) (type: double), _col6 (type: double), (9763215.5639D / _col1) (type: double), (CAST( _col3 AS decimal(19,0)) / -1.389) (type: decimal(28,6)), power(((_col4 - ((_col5 * _col5) / _col3)) / CASE WHEN ((_col3 = 1L)) THEN (null) ELSE ((_col3 - 1)) END), 0.5) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 @@ -146,13 +146,13 @@ STAGE PLANS: native: true projectedOutputColumnNums: [0, 1, 2, 7, 9, 3, 17, 26, 36, 6, 37, 39, 47] selectExpressions: DoubleColSubtractDoubleScalar(col 1:double, val 9763215.5639) -> 7:double, DoubleColUnaryMinus(col 8:double)(children: DoubleColSubtractDoubleScalar(col 1:double, val 9763215.5639) -> 8:double) -> 9:double, FuncPowerDoubleToDouble(col 16:double)(children: DoubleColDivideLongColumn(col 12:double, col 15:bigint)(children: DoubleColSubtractDoubleColumn(col 4:double, col 11:double)(children: DoubleColDivideLongColumn(col 10:double, col 3:bigint)(children: DoubleColMultiplyDoubleColumn(col 5:double, col 5:double) -> 10:double) -> 11:double) -> 12:double, IfExprNullCondExpr(col 13:boolean, null, col 14:bigint)(children: LongColEqualLongScalar(col 3:bigint, val 1) -> 13:boolean, LongColSubtractLongScalar(col 3:bigint, val 1) -> 14:bigint) -> 15:bigint) -> 16:double) -> 17:double, DoubleColUnaryMinus(col 25:double)(children: FuncPowerDoubleToDouble(col 24:double)(children: DoubleColDivideLongColumn(col 20:double, col 23:bigint)(children: DoubleColSubtractDoubleColumn(col 4:double, col 19:double)(children: DoubleColDivideLongColumn(col 18:double, col 3:bigint)(children: DoubleColMultiplyDoubleColumn(col 5:double, col 5:double) -> 18:double) -> 19:double) -> 20:double, IfExprNullCondExpr(col 21:boolean, null, col 22:bigint)(children: LongColEqualLongScalar(col 3:bigint, val 1) -> 21:boolean, LongColSubtractLongScalar(col 3:bigint, val 1) -> 22:bigint) -> 23:bigint) -> 24:double) -> 25:double) -> 26:double, DoubleColMultiplyDoubleColumn(col 34:double, col 35:double)(children: FuncPowerDoubleToDouble(col 33:double)(children: DoubleColDivideLongColumn(col 29:double, col 32:bigint)(children: DoubleColSubtractDoubleColumn(col 4:double, col 28:double)(children: DoubleColDivideLongColumn(col 27:double, col 3:bigint)(children: DoubleColMultiplyDoubleColumn(col 5:double, col 5:double) -> 27:double) -> 28:double) -> 29:double, IfExprNullCondExpr(col 30:boolean, null, col 31:bigint)(children: LongColEqualLongScalar(col 3:bigint, val 1) -> 30:boolean, LongColSubtractLongScalar(col 3:bigint, val 1) -> 31:bigint) -> 32:bigint) -> 33:double) -> 34:double, CastLongToDouble(col 3:bigint) -> 35:double) -> 36:double, DoubleScalarDivideDoubleColumn(val 9763215.5639, col 1:double) -> 37:double, DecimalColDivideDecimalScalar(col 38:decimal(19,0), val -1.389)(children: CastLongToDecimal(col 3:bigint) -> 38:decimal(19,0)) -> 39:decimal(28,6), FuncPowerDoubleToDouble(col 46:double)(children: DoubleColDivideLongColumn(col 42:double, col 45:bigint)(children: DoubleColSubtractDoubleColumn(col 4:double, col 41:double)(children: DoubleColDivideLongColumn(col 40:double, col 3:bigint)(children: DoubleColMultiplyDoubleColumn(col 5:double, col 5:double) -> 40:double) -> 41:double) -> 42:double, IfExprNullCondExpr(col 43:boolean, null, col 44:bigint)(children: LongColEqualLongScalar(col 3:bigint, val 1) -> 43:boolean, LongColSubtractLongScalar(col 3:bigint, val 1) -> 44:bigint) -> 45:bigint) -> 46:double) -> 47:double - Statistics: Num rows: 2048 Data size: 24576 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2048 Data size: 98958 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 2048 Data size: 24576 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2048 Data size: 98958 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/spark/parquet_vectorization_div0.q.out ql/src/test/results/clientpositive/spark/parquet_vectorization_div0.q.out index 868b954f60..8f40278318 100644 --- ql/src/test/results/clientpositive/spark/parquet_vectorization_div0.q.out +++ ql/src/test/results/clientpositive/spark/parquet_vectorization_div0.q.out @@ -25,7 +25,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesparquet - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 593751 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true Select Operator @@ -36,19 +36,19 @@ STAGE PLANS: native: true projectedOutputColumnNums: [13] selectExpressions: DoubleColDivideDoubleScalar(col 5:double, val 0.0) -> 13:double - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 593751 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 100 Limit Vectorization: className: VectorLimitOperator native: true - Statistics: Num rows: 100 Data size: 1200 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 100 Data size: 4800 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 100 Data size: 1200 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 100 Data size: 4800 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -210,7 +210,7 @@ STAGE PLANS: TableScan alias: alltypesparquet filterExpr: ((cbigint > 0L) and (cbigint < 100000000L)) (type: boolean) - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 593751 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true Filter Operator @@ -219,7 +219,7 @@ STAGE PLANS: native: true predicateExpression: FilterExprAndExpr(children: FilterLongColGreaterLongScalar(col 3:bigint, val 0), FilterLongColLessLongScalar(col 3:bigint, val 100000000)) predicate: ((cbigint < 100000000L) and (cbigint > 0L)) (type: boolean) - Statistics: Num rows: 1365 Data size: 16380 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1365 Data size: 65956 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: (cbigint - 988888) (type: bigint), (cdouble / UDFToDouble((cbigint - 988888))) (type: double), (1.2 / CAST( (cbigint - 988888) AS decimal(19,0))) (type: decimal(22,21)) outputColumnNames: _col0, _col1, _col2 @@ -228,7 +228,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [13, 16, 19] selectExpressions: LongColSubtractLongScalar(col 3:bigint, val 988888) -> 13:bigint, DoubleColDivideDoubleColumn(col 5:double, col 15:double)(children: CastLongToDouble(col 14:bigint)(children: LongColSubtractLongScalar(col 3:bigint, val 988888) -> 14:bigint) -> 15:double) -> 16:double, DecimalScalarDivideDecimalColumn(val 1.2, col 18:decimal(19,0))(children: CastLongToDecimal(col 17:bigint)(children: LongColSubtractLongScalar(col 3:bigint, val 988888) -> 17:bigint) -> 18:decimal(19,0)) -> 19:decimal(22,21) - Statistics: Num rows: 1365 Data size: 16380 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1365 Data size: 65956 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: bigint), _col1 (type: double) sort order: ++ @@ -236,7 +236,7 @@ STAGE PLANS: className: VectorReduceSinkObjectHashOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 1365 Data size: 16380 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1365 Data size: 65956 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col2 (type: decimal(22,21)) Execution mode: vectorized @@ -265,19 +265,19 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1, 2] - Statistics: Num rows: 1365 Data size: 16380 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1365 Data size: 65956 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 100 Limit Vectorization: className: VectorLimitOperator native: true - Statistics: Num rows: 100 Data size: 1200 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 100 Data size: 4800 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 100 Data size: 1200 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 100 Data size: 4800 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -431,7 +431,7 @@ STAGE PLANS: TableScan alias: alltypesparquet filterExpr: ((cdouble >= -500.0D) and (cdouble < -199.0D)) (type: boolean) - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 593751 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true Filter Operator @@ -440,7 +440,7 @@ STAGE PLANS: native: true predicateExpression: FilterExprAndExpr(children: FilterDoubleColGreaterEqualDoubleScalar(col 5:double, val -500.0), FilterDoubleColLessDoubleScalar(col 5:double, val -199.0)) predicate: ((cdouble < -199.0D) and (cdouble >= -500.0D)) (type: boolean) - Statistics: Num rows: 1365 Data size: 16380 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1365 Data size: 65956 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: (cdouble + 200.0D) (type: double), (UDFToDouble(cbigint) / (cdouble + 200.0D)) (type: double), ((cdouble + 200.0D) / (cdouble + 200.0D)) (type: double), (3.0D / (cdouble + 200.0D)) (type: double), (1.2D / (cdouble + 200.0D)) (type: double) outputColumnNames: _col0, _col1, _col2, _col4, _col5 @@ -449,7 +449,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [13, 16, 19, 21, 23] selectExpressions: DoubleColAddDoubleScalar(col 5:double, val 200.0) -> 13:double, DoubleColDivideDoubleColumn(col 14:double, col 15:double)(children: CastLongToDouble(col 3:bigint) -> 14:double, DoubleColAddDoubleScalar(col 5:double, val 200.0) -> 15:double) -> 16:double, DoubleColDivideDoubleColumn(col 17:double, col 18:double)(children: DoubleColAddDoubleScalar(col 5:double, val 200.0) -> 17:double, DoubleColAddDoubleScalar(col 5:double, val 200.0) -> 18:double) -> 19:double, DoubleScalarDivideDoubleColumn(val 3.0, col 20:double)(children: DoubleColAddDoubleScalar(col 5:double, val 200.0) -> 20:double) -> 21:double, DoubleScalarDivideDoubleColumn(val 1.2, col 22:double)(children: DoubleColAddDoubleScalar(col 5:double, val 200.0) -> 22:double) -> 23:double - Statistics: Num rows: 1365 Data size: 16380 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1365 Data size: 65956 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: double), _col1 (type: double) sort order: ++ @@ -457,7 +457,7 @@ STAGE PLANS: className: VectorReduceSinkObjectHashOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 1365 Data size: 16380 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1365 Data size: 65956 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col2 (type: double), _col4 (type: double), _col5 (type: double) Execution mode: vectorized @@ -486,19 +486,19 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1, 2, 1, 3, 4] - Statistics: Num rows: 1365 Data size: 16380 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1365 Data size: 65956 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 100 Limit Vectorization: className: VectorLimitOperator native: true - Statistics: Num rows: 100 Data size: 1200 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 100 Data size: 4800 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 100 Data size: 1200 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 100 Data size: 4800 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/spark/parquet_vectorization_limit.q.out ql/src/test/results/clientpositive/spark/parquet_vectorization_limit.q.out index 8f03384a4c..b0abc06f04 100644 --- ql/src/test/results/clientpositive/spark/parquet_vectorization_limit.q.out +++ ql/src/test/results/clientpositive/spark/parquet_vectorization_limit.q.out @@ -25,20 +25,20 @@ STAGE PLANS: TableScan alias: alltypesparquet filterExpr: ((UDFToDouble(cbigint) < cdouble) and (cint > 0)) (type: boolean) - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 593751 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: ((UDFToDouble(cbigint) < cdouble) and (cint > 0)) (type: boolean) - Statistics: Num rows: 1365 Data size: 16380 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1365 Data size: 65956 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cbigint (type: bigint), cdouble (type: double) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1365 Data size: 16380 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1365 Data size: 65956 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 7 - Statistics: Num rows: 7 Data size: 84 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 7 Data size: 336 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 7 Data size: 84 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 7 Data size: 336 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -106,7 +106,7 @@ STAGE PLANS: TableScan alias: alltypesparquet filterExpr: ctinyint is not null (type: boolean) - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 593751 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true Filter Operator @@ -115,7 +115,7 @@ STAGE PLANS: native: true predicateExpression: SelectColumnIsNotNull(col 0:tinyint) predicate: ctinyint is not null (type: boolean) - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 593751 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ctinyint (type: tinyint), cdouble (type: double), csmallint (type: smallint) outputColumnNames: _col0, _col1, _col2 @@ -123,7 +123,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 5, 1] - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 593751 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: tinyint), _col1 (type: double) sort order: ++ @@ -131,7 +131,7 @@ STAGE PLANS: className: VectorReduceSinkObjectHashOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 593751 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.3 value expressions: _col2 (type: smallint) Execution mode: vectorized @@ -160,19 +160,19 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1, 2] - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 593751 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 20 Limit Vectorization: className: VectorLimitOperator native: true - Statistics: Num rows: 20 Data size: 240 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 960 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 20 Data size: 240 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 960 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -241,7 +241,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesparquet - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 593751 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true Select Operator @@ -252,7 +252,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [0, 13] selectExpressions: DoubleColAddDoubleScalar(col 5:double, val 1.0) -> 13:double - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 593751 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(_col1), count(_col1) Group By Vectorization: @@ -266,7 +266,7 @@ STAGE PLANS: keys: _col0 (type: tinyint) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 593751 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: tinyint) sort order: + @@ -275,7 +275,7 @@ STAGE PLANS: className: VectorReduceSinkObjectHashOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 593751 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.3 value expressions: _col1 (type: double), _col2 (type: bigint) Execution mode: vectorized @@ -310,7 +310,7 @@ STAGE PLANS: keys: KEY._col0 (type: tinyint) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6144 Data size: 73728 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6144 Data size: 296875 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: tinyint), (_col1 / _col2) (type: double) outputColumnNames: _col0, _col1 @@ -319,19 +319,19 @@ STAGE PLANS: native: true projectedOutputColumnNums: [0, 3] selectExpressions: DoubleColDivideLongColumn(col 1:double, col 2:bigint) -> 3:double - Statistics: Num rows: 6144 Data size: 73728 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6144 Data size: 296875 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 20 Limit Vectorization: className: VectorLimitOperator native: true - Statistics: Num rows: 20 Data size: 240 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 960 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 20 Data size: 240 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 960 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -400,7 +400,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesparquet - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 593751 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true Select Operator @@ -410,7 +410,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0] - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 593751 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: className: VectorGroupByOperator @@ -422,7 +422,7 @@ STAGE PLANS: keys: ctinyint (type: tinyint) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 593751 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: tinyint) sort order: + @@ -431,7 +431,7 @@ STAGE PLANS: className: VectorReduceSinkLongOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 593751 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.3 Execution mode: vectorized Map Vectorization: @@ -463,19 +463,19 @@ STAGE PLANS: keys: KEY._col0 (type: tinyint) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 6144 Data size: 73728 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6144 Data size: 296875 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 20 Limit Vectorization: className: VectorLimitOperator native: true - Statistics: Num rows: 20 Data size: 240 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 960 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 20 Data size: 240 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 960 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -544,7 +544,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesparquet - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 593751 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true Select Operator @@ -554,7 +554,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 5] - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 593751 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: className: VectorGroupByOperator @@ -566,7 +566,7 @@ STAGE PLANS: keys: ctinyint (type: tinyint), cdouble (type: double) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 593751 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: tinyint), _col1 (type: double) sort order: ++ @@ -575,7 +575,7 @@ STAGE PLANS: className: VectorReduceSinkObjectHashOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 593751 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Map Vectorization: enabled: true @@ -606,7 +606,7 @@ STAGE PLANS: keys: KEY._col0 (type: tinyint), KEY._col1 (type: double) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 6144 Data size: 73728 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6144 Data size: 296875 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(_col1) Group By Vectorization: @@ -620,19 +620,19 @@ STAGE PLANS: keys: _col0 (type: tinyint) mode: complete outputColumnNames: _col0, _col1 - Statistics: Num rows: 3072 Data size: 36864 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3072 Data size: 148437 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 20 Limit Vectorization: className: VectorLimitOperator native: true - Statistics: Num rows: 20 Data size: 240 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 960 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 20 Data size: 240 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 960 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -735,7 +735,7 @@ STAGE PLANS: TableScan alias: alltypesparquet filterExpr: ctinyint is not null (type: boolean) - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 593751 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true Filter Operator @@ -744,7 +744,7 @@ STAGE PLANS: native: true predicateExpression: SelectColumnIsNotNull(col 0:tinyint) predicate: ctinyint is not null (type: boolean) - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 593751 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(ctinyint) Group By Vectorization: @@ -758,7 +758,7 @@ STAGE PLANS: keys: cdouble (type: double) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 593751 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: double) sort order: + @@ -767,7 +767,7 @@ STAGE PLANS: className: VectorReduceSinkMultiKeyOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 593751 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Execution mode: vectorized Map Vectorization: @@ -801,7 +801,7 @@ STAGE PLANS: keys: KEY._col0 (type: double) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 6144 Data size: 73728 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6144 Data size: 296875 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: bigint), _col0 (type: double) sort order: ++ @@ -809,7 +809,7 @@ STAGE PLANS: className: VectorReduceSinkObjectHashOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 6144 Data size: 73728 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6144 Data size: 296875 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.3 Reducer 3 Execution mode: vectorized @@ -827,19 +827,19 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [1, 0] - Statistics: Num rows: 6144 Data size: 73728 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6144 Data size: 296875 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 20 Limit Vectorization: className: VectorLimitOperator native: true - Statistics: Num rows: 20 Data size: 240 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 960 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 20 Data size: 240 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 960 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/spark/parquet_vectorization_offset_limit.q.out ql/src/test/results/clientpositive/spark/parquet_vectorization_offset_limit.q.out index 6034485048..51d2386e39 100644 --- ql/src/test/results/clientpositive/spark/parquet_vectorization_offset_limit.q.out +++ ql/src/test/results/clientpositive/spark/parquet_vectorization_offset_limit.q.out @@ -25,21 +25,21 @@ STAGE PLANS: TableScan alias: alltypesparquet filterExpr: ((UDFToDouble(cbigint) < cdouble) and (cint > 0)) (type: boolean) - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 593751 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: ((UDFToDouble(cbigint) < cdouble) and (cint > 0)) (type: boolean) - Statistics: Num rows: 1365 Data size: 16380 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1365 Data size: 65956 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cbigint (type: bigint), cdouble (type: double) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1365 Data size: 16380 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1365 Data size: 65956 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 2 Offset of rows: 3 - Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 96 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 96 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -102,7 +102,7 @@ STAGE PLANS: TableScan alias: alltypesparquet filterExpr: ctinyint is not null (type: boolean) - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 593751 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true Filter Operator @@ -111,7 +111,7 @@ STAGE PLANS: native: true predicateExpression: SelectColumnIsNotNull(col 0:tinyint) predicate: ctinyint is not null (type: boolean) - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 593751 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ctinyint (type: tinyint), cdouble (type: double), csmallint (type: smallint) outputColumnNames: _col0, _col1, _col2 @@ -119,7 +119,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 5, 1] - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 593751 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: tinyint), _col1 (type: double) sort order: ++ @@ -127,7 +127,7 @@ STAGE PLANS: className: VectorReduceSinkObjectHashOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 593751 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col2 (type: smallint) Execution mode: vectorized @@ -156,20 +156,20 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1, 2] - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 593751 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 3 Limit Vectorization: className: VectorLimitOperator native: true Offset of rows: 10 - Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 144 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 144 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/spark/parquet_vectorization_pushdown.q.out ql/src/test/results/clientpositive/spark/parquet_vectorization_pushdown.q.out index 00f0e060f4..c834a3f565 100644 --- ql/src/test/results/clientpositive/spark/parquet_vectorization_pushdown.q.out +++ ql/src/test/results/clientpositive/spark/parquet_vectorization_pushdown.q.out @@ -27,14 +27,14 @@ STAGE PLANS: TableScan alias: alltypesparquet filterExpr: (UDFToDouble(cbigint) < cdouble) (type: boolean) - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 593751 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (UDFToDouble(cbigint) < cdouble) (type: boolean) - Statistics: Num rows: 4096 Data size: 49152 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4096 Data size: 197917 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cbigint (type: bigint) outputColumnNames: cbigint - Statistics: Num rows: 4096 Data size: 49152 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4096 Data size: 197917 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(cbigint), count(cbigint) mode: hash