diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/ParquetHiveSerDe.java b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/ParquetHiveSerDe.java index e0018a5bc0..0534a285eb 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/ParquetHiveSerDe.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/ParquetHiveSerDe.java @@ -69,30 +69,15 @@ } } - private SerDeStats stats; private ObjectInspector objInspector; - - private enum LAST_OPERATION { - SERIALIZE, - DESERIALIZE, - UNKNOWN - } - - private LAST_OPERATION status; - private long serializedSize; - private long deserializedSize; - private ParquetHiveRecord parquetRow; public ParquetHiveSerDe() { parquetRow = new ParquetHiveRecord(); - stats = new SerDeStats(); } @Override public final void initialize(final Configuration conf, final Properties tbl) throws SerDeException { - - final TypeInfo rowTypeInfo; final List columnNames; final List columnTypes; // Get column names and sort order @@ -128,19 +113,11 @@ public final void initialize(final Configuration conf, final Properties tbl) thr } } this.objInspector = new ArrayWritableObjectInspector(completeTypeInfo, prunedTypeInfo); - - // Stats part - serializedSize = 0; - deserializedSize = 0; - status = LAST_OPERATION.UNKNOWN; } @Override public Object deserialize(final Writable blob) throws SerDeException { - status = LAST_OPERATION.DESERIALIZE; - deserializedSize = 0; if (blob instanceof ArrayWritable) { - deserializedSize = ((ArrayWritable) blob).get().length; return blob; } else { return null; @@ -163,23 +140,21 @@ public Writable serialize(final Object obj, final ObjectInspector objInspector) if (!objInspector.getCategory().equals(Category.STRUCT)) { throw new SerDeException("Cannot serialize " + objInspector.getCategory() + ". Can only serialize a struct"); } - serializedSize = ((StructObjectInspector)objInspector).getAllStructFieldRefs().size(); - status = LAST_OPERATION.SERIALIZE; + parquetRow.value = obj; parquetRow.inspector= (StructObjectInspector)objInspector; return parquetRow; } + /** + * Return null for Parquet format and stats is collected in ParquetRecordWriterWrapper when writer gets + * closed + * + * @return null + */ @Override public SerDeStats getSerDeStats() { - // must be different - assert (status != LAST_OPERATION.UNKNOWN); - if (status == LAST_OPERATION.SERIALIZE) { - stats.setRawDataSize(serializedSize); - } else { - stats.setRawDataSize(deserializedSize); - } - return stats; + return null; } /** diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/write/ParquetRecordWriterWrapper.java b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/write/ParquetRecordWriterWrapper.java index db8a33247f..eee21cd9b2 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/write/ParquetRecordWriterWrapper.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/write/ParquetRecordWriterWrapper.java @@ -22,6 +22,7 @@ import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.ql.io.StatsProvidingRecordWriter; import org.apache.hadoop.hive.ql.io.parquet.serde.ParquetTableUtils; import org.apache.hadoop.io.NullWritable; import org.apache.hadoop.io.Writable; @@ -32,21 +33,25 @@ import org.apache.hadoop.mapreduce.OutputFormat; import org.apache.hadoop.mapreduce.TaskAttemptContext; import org.apache.hadoop.mapreduce.TaskAttemptID; +import org.apache.hadoop.hive.serde2.SerDeStats; import org.apache.hadoop.hive.serde2.io.ParquetHiveRecord; import org.apache.hadoop.util.Progressable; - +import org.apache.parquet.hadoop.ParquetFileReader; import org.apache.parquet.hadoop.ParquetOutputFormat; +import org.apache.parquet.hadoop.metadata.BlockMetaData; import org.apache.parquet.hadoop.metadata.CompressionCodecName; import org.apache.parquet.hadoop.util.ContextUtil; +import org.apache.parquet.hadoop.util.HadoopInputFile; -public class ParquetRecordWriterWrapper implements RecordWriter, - org.apache.hadoop.hive.ql.exec.FileSinkOperator.RecordWriter { +public class ParquetRecordWriterWrapper implements RecordWriter, StatsProvidingRecordWriter, org.apache.hadoop.hive.ql.exec.FileSinkOperator.RecordWriter { public static final Logger LOG = LoggerFactory.getLogger(ParquetRecordWriterWrapper.class); private final org.apache.hadoop.mapreduce.RecordWriter realWriter; private final TaskAttemptContext taskContext; - + private final JobConf jobConf; + private final Path file; + private SerDeStats stats; public ParquetRecordWriterWrapper( final OutputFormat realOutputFormat, final JobConf jobConf, @@ -66,8 +71,12 @@ public ParquetRecordWriterWrapper( LOG.info("creating real writer to write at " + name); + this.jobConf = jobConf; + this.file = new Path(name); + realWriter = - ((ParquetOutputFormat) realOutputFormat).getRecordWriter(taskContext, new Path(name)); + ((ParquetOutputFormat) realOutputFormat).getRecordWriter(taskContext, this.file); + LOG.info("real writer: " + realWriter); } catch (final InterruptedException e) { @@ -128,6 +137,21 @@ public void close(final Reporter reporter) throws IOException { } catch (final InterruptedException e) { throw new IOException(e); } + + // Collect file stats + try { + ParquetFileReader reader = ParquetFileReader.open(HadoopInputFile.fromPath(this.file, this.jobConf)); + long totalSize = 0; + for (BlockMetaData block : reader.getFooter().getBlocks()) { + totalSize += block.getTotalByteSize(); + } + + stats = new SerDeStats(); + stats.setRowCount(reader.getRecordCount()); + stats.setRawDataSize(totalSize); + } catch(IOException e) { + // Ignore + } } @Override @@ -149,4 +173,9 @@ public void write(final Writable w) throws IOException { write(null, (ParquetHiveRecord) w); } + @Override + public SerDeStats getStats() { + return stats; + } + } diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/TestParquetSerDe.java b/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/TestParquetSerDe.java index 06f27b5091..097dbebf6f 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/TestParquetSerDe.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/TestParquetSerDe.java @@ -115,7 +115,8 @@ public void testParquetHiveSerDeComplexTypes() throws Throwable { assertEquals(wb[0], boi.getStructFieldData(awb, b)); } - private void deserializeAndSerializeLazySimple(final ParquetHiveSerDe serDe, final ArrayWritable t) throws SerDeException { + private void deserializeAndSerializeLazySimple(final ParquetHiveSerDe serDe, final ArrayWritable t) + throws SerDeException { // Get the row structure final StructObjectInspector oi = (StructObjectInspector) serDe.getObjectInspector(); @@ -123,13 +124,14 @@ private void deserializeAndSerializeLazySimple(final ParquetHiveSerDe serDe, fin // Deserialize final Object row = serDe.deserialize(t); assertEquals("deserialization gives the wrong object class", row.getClass(), ArrayWritable.class); - assertEquals("size correct after deserialization", serDe.getSerDeStats().getRawDataSize(), t.get().length); assertEquals("deserialization gives the wrong object", t, row); // Serialize final ParquetHiveRecord serializedArr = (ParquetHiveRecord) serDe.serialize(row, oi); - assertEquals("size correct after serialization", serDe.getSerDeStats().getRawDataSize(), ((ArrayWritable)serializedArr.getObject()).get().length); - assertTrue("serialized object should be equal to starting object", arrayWritableEquals(t, (ArrayWritable)serializedArr.getObject())); + assertTrue("serialized object should be equal to starting object", + arrayWritableEquals(t, (ArrayWritable)serializedArr.getObject())); + + assertEquals("Stats are not collected during serialization and deserialization", null, serDe.getSerDeStats()); } private Properties createProperties() { diff --git a/ql/src/test/queries/clientpositive/parquet_stats.q b/ql/src/test/queries/clientpositive/parquet_stats.q new file mode 100644 index 0000000000..92eaadb293 --- /dev/null +++ b/ql/src/test/queries/clientpositive/parquet_stats.q @@ -0,0 +1,12 @@ + +DROP TABLE if exists parquet_stats; + +CREATE TABLE parquet_stats ( + id int, + str string +) STORED AS PARQUET; + +SET hive.stats.autogather=true; +INSERT INTO parquet_stats values(0, 'this is string 0'), (1, 'string 1'); +DESC FORMATTED parquet_stats; + diff --git a/ql/src/test/results/clientpositive/llap/vector_partitioned_date_time.q.out b/ql/src/test/results/clientpositive/llap/vector_partitioned_date_time.q.out index 57a1ea7cfa..fd7f795414 100644 --- a/ql/src/test/results/clientpositive/llap/vector_partitioned_date_time.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_partitioned_date_time.q.out @@ -3791,7 +3791,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: flights_tiny_parquet_partitioned_date - Statistics: Num rows: 137 Data size: 8357 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 137 Data size: 13861 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true Select Operator @@ -3801,7 +3801,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [5] - Statistics: Num rows: 137 Data size: 8357 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 137 Data size: 13861 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() Group By Vectorization: @@ -4323,7 +4323,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: flights_tiny_parquet_partitioned_timestamp - Statistics: Num rows: 137 Data size: 63189 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 137 Data size: 68213 Basic stats: COMPLETE Column stats: PARTIAL TableScan Vectorization: native: true Select Operator @@ -4558,7 +4558,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: flights_tiny_parquet_partitioned_timestamp - Statistics: Num rows: 137 Data size: 63189 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 137 Data size: 68213 Basic stats: COMPLETE Column stats: PARTIAL TableScan Vectorization: native: true Select Operator @@ -4568,7 +4568,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1, 2, 3, 4, 5] - Statistics: Num rows: 137 Data size: 63189 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 137 Data size: 68213 Basic stats: COMPLETE Column stats: PARTIAL Reduce Output Operator key expressions: _col4 (type: int), _col5 (type: timestamp) sort order: ++ @@ -4576,7 +4576,7 @@ STAGE PLANS: className: VectorReduceSinkObjectHashOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 137 Data size: 63189 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 137 Data size: 68213 Basic stats: COMPLETE Column stats: PARTIAL TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: date), _col3 (type: float) Execution mode: vectorized, llap @@ -4746,7 +4746,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: flights_tiny_parquet_partitioned_timestamp - Statistics: Num rows: 137 Data size: 6165 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 137 Data size: 11189 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true Select Operator @@ -4756,7 +4756,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [5] - Statistics: Num rows: 137 Data size: 6165 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 137 Data size: 11189 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() Group By Vectorization: diff --git a/ql/src/test/results/clientpositive/nested_column_pruning.q.out b/ql/src/test/results/clientpositive/nested_column_pruning.q.out index 02a271641b..d806cc58c8 100644 --- a/ql/src/test/results/clientpositive/nested_column_pruning.q.out +++ b/ql/src/test/results/clientpositive/nested_column_pruning.q.out @@ -135,14 +135,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: nested_tbl_1_n1 - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1120 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: a (type: int) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1120 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1120 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -178,14 +178,14 @@ STAGE PLANS: TableScan alias: nested_tbl_1_n1 Pruned Column Paths: s1.f1 - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1120 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: s1.f1 (type: boolean) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1120 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1120 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -221,14 +221,14 @@ STAGE PLANS: TableScan alias: nested_tbl_1_n1 Pruned Column Paths: s1.f1, s1.f2 - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1120 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: s1.f1 (type: boolean), s1.f2 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1120 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1120 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -264,14 +264,14 @@ STAGE PLANS: TableScan alias: nested_tbl_1_n1 Pruned Column Paths: s1.f3 - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1120 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: s1.f3 (type: struct), s1.f3.f4 (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1120 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1120 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -307,14 +307,14 @@ STAGE PLANS: TableScan alias: nested_tbl_1_n1 Pruned Column Paths: s1.f3.f5 - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1120 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: s1.f3.f5 (type: double) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1120 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1120 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -350,14 +350,14 @@ STAGE PLANS: TableScan alias: nested_tbl_1_n1 Pruned Column Paths: s1.f3.f4, s2.f8.f9 - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1120 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: s1.f3.f4 (type: int), s2.f8.f9 (type: boolean) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1120 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1120 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -394,17 +394,17 @@ STAGE PLANS: alias: nested_tbl_1_n1 filterExpr: (s1.f1 = false) (type: boolean) Pruned Column Paths: s1.f2, s1.f1 - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1120 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (s1.f1 = false) (type: boolean) - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1120 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: s1.f2 (type: string) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1120 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1120 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -441,17 +441,17 @@ STAGE PLANS: alias: nested_tbl_1_n1 filterExpr: (s1.f3.f4 = 4) (type: boolean) Pruned Column Paths: s1.f3.f5, s1.f3.f4 - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1120 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (s1.f3.f4 = 4) (type: boolean) - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1120 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: s1.f3.f5 (type: double) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1120 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1120 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -488,17 +488,17 @@ STAGE PLANS: alias: nested_tbl_1_n1 filterExpr: ((s1.f2 = 'foo') and (size(s2.f8.f10) > 1) and s2.f8.f11['key1']) (type: boolean) Pruned Column Paths: s1.f2, s2.f8 - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1120 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: ((s1.f2 = 'foo') and (size(s2.f8.f10) > 1) and s2.f8.f11['key1']) (type: boolean) - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1120 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: s2.f8 (type: struct,f11:map>) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1120 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1120 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -538,32 +538,32 @@ STAGE PLANS: TableScan alias: nested_tbl_1_n1 Pruned Column Paths: s2.f8.f10 - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1120 Basic stats: COMPLETE Column stats: NONE Lateral View Forward - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1120 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: s3 (type: struct>>) outputColumnNames: s3 - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1120 Basic stats: COMPLETE Column stats: NONE Lateral View Join Operator outputColumnNames: _col3, _col10 - Statistics: Num rows: 2 Data size: 14 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 2240 Basic stats: COMPLETE Column stats: NONE Lateral View Forward - Statistics: Num rows: 2 Data size: 14 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 2240 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col10 (type: int) outputColumnNames: _col10 - Statistics: Num rows: 2 Data size: 14 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 2240 Basic stats: COMPLETE Column stats: NONE Lateral View Join Operator outputColumnNames: _col10, _col11 - Statistics: Num rows: 4 Data size: 28 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 4480 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col10 (type: int), _col11 (type: struct) outputColumnNames: _col0, _col1 - Statistics: Num rows: 4 Data size: 28 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 4480 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 4 Data size: 28 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 4480 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -571,20 +571,20 @@ STAGE PLANS: Select Operator expressions: _col3.f12 (type: array>) outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 14 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 2240 Basic stats: COMPLETE Column stats: NONE UDTF Operator - Statistics: Num rows: 2 Data size: 14 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 2240 Basic stats: COMPLETE Column stats: NONE function name: explode Lateral View Join Operator outputColumnNames: _col10, _col11 - Statistics: Num rows: 4 Data size: 28 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 4480 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col10 (type: int), _col11 (type: struct) outputColumnNames: _col0, _col1 - Statistics: Num rows: 4 Data size: 28 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 4480 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 4 Data size: 28 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 4480 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -592,29 +592,29 @@ STAGE PLANS: Select Operator expressions: s2.f8.f10 (type: array) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1120 Basic stats: COMPLETE Column stats: NONE UDTF Operator - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1120 Basic stats: COMPLETE Column stats: NONE function name: explode Lateral View Join Operator outputColumnNames: _col3, _col10 - Statistics: Num rows: 2 Data size: 14 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 2240 Basic stats: COMPLETE Column stats: NONE Lateral View Forward - Statistics: Num rows: 2 Data size: 14 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 2240 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col10 (type: int) outputColumnNames: _col10 - Statistics: Num rows: 2 Data size: 14 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 2240 Basic stats: COMPLETE Column stats: NONE Lateral View Join Operator outputColumnNames: _col10, _col11 - Statistics: Num rows: 4 Data size: 28 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 4480 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col10 (type: int), _col11 (type: struct) outputColumnNames: _col0, _col1 - Statistics: Num rows: 4 Data size: 28 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 4480 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 4 Data size: 28 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 4480 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -622,20 +622,20 @@ STAGE PLANS: Select Operator expressions: _col3.f12 (type: array>) outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 14 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 2240 Basic stats: COMPLETE Column stats: NONE UDTF Operator - Statistics: Num rows: 2 Data size: 14 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 2240 Basic stats: COMPLETE Column stats: NONE function name: explode Lateral View Join Operator outputColumnNames: _col10, _col11 - Statistics: Num rows: 4 Data size: 28 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 4480 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col10 (type: int), _col11 (type: struct) outputColumnNames: _col0, _col1 - Statistics: Num rows: 4 Data size: 28 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 4480 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 4 Data size: 28 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 4480 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -678,14 +678,14 @@ STAGE PLANS: TableScan alias: nested_tbl_1_n1 Pruned Column Paths: s2.f8.f10, s1.f3.f4 - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1120 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: (s2.f8.f10[1] pmod s1.f3.f4) (type: int) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1120 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1120 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -721,22 +721,22 @@ STAGE PLANS: TableScan alias: nested_tbl_1_n1 Pruned Column Paths: s1.f3.f5, s1.f3.f4 - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1120 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: s1.f3.f5 (type: double), s1.f3.f4 (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1120 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(_col1) keys: _col0 (type: double) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1120 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: double) sort order: + Map-reduce partition columns: _col0 (type: double) - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1120 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator @@ -744,10 +744,10 @@ STAGE PLANS: keys: KEY._col0 (type: double) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1120 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1120 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -783,22 +783,22 @@ STAGE PLANS: TableScan alias: nested_tbl_1_n1 Pruned Column Paths: s1.f3 - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1120 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: s1.f3 (type: struct), s1.f3.f4 (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1120 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(_col1) keys: _col0 (type: struct) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1120 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: struct) sort order: + Map-reduce partition columns: _col0 (type: struct) - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1120 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator @@ -806,10 +806,10 @@ STAGE PLANS: keys: KEY._col0 (type: struct) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1120 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1120 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -846,22 +846,22 @@ STAGE PLANS: TableScan alias: nested_tbl_1_n1 Pruned Column Paths: s1.f3 - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1120 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: s1.f3 (type: struct), s1.f3.f4 (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1120 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(_col1) keys: _col0 (type: struct) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1120 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: struct) sort order: + Map-reduce partition columns: _col0 (type: struct) - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1120 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator @@ -869,7 +869,7 @@ STAGE PLANS: keys: KEY._col0 (type: struct) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1120 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: @@ -884,16 +884,16 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: struct) sort order: + - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1120 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: struct), VALUE._col0 (type: bigint) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1120 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1120 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -935,36 +935,36 @@ STAGE PLANS: TableScan alias: t1 filterExpr: s1.f3.f4 is not null (type: boolean) - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1120 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: s1.f3.f4 is not null (type: boolean) - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1120 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: s1 (type: struct,f6:int>) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1120 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0.f3.f4 (type: int) sort order: + Map-reduce partition columns: _col0.f3.f4 (type: int) - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1120 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: struct,f6:int>) TableScan alias: t2 filterExpr: ((s2.f8.f9 = false) and s1.f6 is not null) (type: boolean) - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1125 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: ((s2.f8.f9 = false) and s1.f6 is not null) (type: boolean) - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1125 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: s1 (type: struct,f6:int>), s2 (type: struct,f11:map>>) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1125 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0.f6 (type: int) sort order: + Map-reduce partition columns: _col0.f6 (type: int) - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1125 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: struct,f11:map>>) Reduce Operator Tree: Join Operator @@ -974,14 +974,14 @@ STAGE PLANS: 0 _col0.f3.f4 (type: int) 1 _col0.f6 (type: int) outputColumnNames: _col0, _col2 - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1232 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0.f3.f5 (type: double), _col2.f8 (type: struct,f11:map>) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1232 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1232 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1031,36 +1031,36 @@ STAGE PLANS: TableScan alias: t1 filterExpr: s1.f3.f4 is not null (type: boolean) - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1120 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: s1.f3.f4 is not null (type: boolean) - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1120 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: s1 (type: struct,f6:int>) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1120 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0.f3.f4 (type: int) sort order: + Map-reduce partition columns: _col0.f3.f4 (type: int) - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1120 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: struct,f6:int>) TableScan alias: t2 filterExpr: (s2.f8.f9 and s1.f6 is not null) (type: boolean) - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1120 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (s1.f6 is not null and s2.f8.f9) (type: boolean) - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1120 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: s1 (type: struct,f6:int>), s2 (type: struct,f11:map>>) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1120 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0.f6 (type: int) sort order: + Map-reduce partition columns: _col0.f6 (type: int) - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1120 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: struct,f11:map>>) Reduce Operator Tree: Join Operator @@ -1070,14 +1070,14 @@ STAGE PLANS: 0 _col0.f3.f4 (type: int) 1 _col0.f6 (type: int) outputColumnNames: _col0, _col2 - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1232 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0.f3.f5 (type: double), _col2.f8 (type: struct,f11:map>) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1232 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1232 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1123,42 +1123,42 @@ STAGE PLANS: TableScan alias: t1 filterExpr: s1.f3.f4 is not null (type: boolean) - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1120 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: s1.f3.f4 is not null (type: boolean) - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1120 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: s1 (type: struct,f6:int>), s1.f3.f4 (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1120 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: int), true (type: boolean) sort order: ++ Map-reduce partition columns: _col1 (type: int), true (type: boolean) - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1120 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: struct,f6:int>) TableScan alias: t2 filterExpr: (s2.f8.f9 and s1.f6 is not null and s2.f8.f9 is not null) (type: boolean) Pruned Column Paths: s1.f6, s2.f8.f9 - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1120 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (s1.f6 is not null and s2.f8.f9 and s2.f8.f9 is not null) (type: boolean) - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1120 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: s1.f6 (type: int), (s2.f8.f9 = true) (type: boolean) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1120 Basic stats: COMPLETE Column stats: NONE Group By Operator keys: _col0 (type: int), _col1 (type: boolean) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1120 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: boolean) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: boolean) - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1120 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: @@ -1167,14 +1167,14 @@ STAGE PLANS: 0 _col1 (type: int), true (type: boolean) 1 _col0 (type: int), _col1 (type: boolean) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1232 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0.f3.f5 (type: double) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1232 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1232 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1218,25 +1218,25 @@ STAGE PLANS: Map Operator Tree: TableScan alias: t1 - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1120 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: s1 (type: struct,f6:int>) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1120 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1120 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: struct,f6:int>) TableScan alias: t2 - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1120 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: s2 (type: struct,f11:map>>) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1120 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1120 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: struct,f11:map>>) Reduce Operator Tree: Join Operator @@ -1247,14 +1247,14 @@ STAGE PLANS: 1 outputColumnNames: _col0, _col1 residual filter predicates: {(_col0.f1 <> _col1.f8.f9)} - Statistics: Num rows: 1 Data size: 15 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 2241 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0.f3.f5 (type: double) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 15 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 2241 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 15 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 2241 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1299,37 +1299,37 @@ STAGE PLANS: TableScan alias: t1 filterExpr: s1.f3.f4 is not null (type: boolean) - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1120 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: s1.f3.f4 is not null (type: boolean) - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1120 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: s1 (type: struct,f6:int>), s1.f3.f4 (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1120 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: int) sort order: + Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1120 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: struct,f6:int>) TableScan alias: t2 filterExpr: s1.f6 is not null (type: boolean) Pruned Column Paths: s1.f6 - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1120 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: s1.f6 is not null (type: boolean) - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1120 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: s2 (type: struct,f11:map>>), s1.f6 (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1120 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: int) sort order: + Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1120 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: struct,f11:map>>) Reduce Operator Tree: Join Operator @@ -1340,14 +1340,14 @@ STAGE PLANS: 1 _col1 (type: int) outputColumnNames: _col0, _col2 residual filter predicates: {(_col0.f1 <> _col2.f8.f9)} - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1232 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0.f3.f5 (type: double) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1232 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1232 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1430,22 +1430,22 @@ STAGE PLANS: TableScan alias: nested_tbl_1_n1 Pruned Column Paths: s3.f12, s1.f6 - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1120 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: s3.f12[0].f14 (type: int), s1.f6 (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1120 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(_col1) keys: _col0 (type: int) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1120 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1120 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator @@ -1453,14 +1453,14 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1120 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col1 (type: bigint), _col0 (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1120 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1120 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1506,22 +1506,22 @@ STAGE PLANS: TableScan alias: nested_tbl_1_n1 Pruned Column Paths: s1.f6 - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1120 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: s4['key1'].f15 (type: int), s1.f6 (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1120 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(_col1) keys: _col0 (type: int) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1120 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1120 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator @@ -1529,14 +1529,14 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1120 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col1 (type: bigint), _col0 (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1120 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1120 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1582,22 +1582,22 @@ STAGE PLANS: TableScan alias: nested_tbl_1_n1 Pruned Column Paths: s5.f16, s1.f6 - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1120 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: s5.f16[0].f18.f19 (type: int), s1.f6 (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1120 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(_col1) keys: _col0 (type: int) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1120 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1120 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator @@ -1605,14 +1605,14 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1120 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col1 (type: bigint), _col0 (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1120 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1120 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1658,22 +1658,22 @@ STAGE PLANS: TableScan alias: nested_tbl_1_n1 Pruned Column Paths: s1.f6, s5.f16 - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1120 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: s1 (type: struct,f6:int>), s5 (type: struct>>>) outputColumnNames: s1, s5 - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1120 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(s1.f6) keys: s5.f16.f18.f19 (type: array) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1120 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: array) sort order: + Map-reduce partition columns: _col0 (type: array) - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1120 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator @@ -1681,14 +1681,14 @@ STAGE PLANS: keys: KEY._col0 (type: array) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1120 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col1 (type: bigint), _col0 (type: array) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1120 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1120 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1734,22 +1734,22 @@ STAGE PLANS: TableScan alias: nested_tbl_1_n1 Pruned Column Paths: s1.f6 - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1120 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: s6['key1'].f20[0].f21.f22 (type: int), s1.f6 (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1120 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(_col1) keys: _col0 (type: int) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1120 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1120 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator @@ -1757,14 +1757,14 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1120 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col1 (type: bigint), _col0 (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1120 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1120 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1810,22 +1810,22 @@ STAGE PLANS: TableScan alias: nested_tbl_1_n1 Pruned Column Paths: s1.f6 - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1120 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: s1 (type: struct,f6:int>), s6 (type: map>>>>) outputColumnNames: s1, s6 - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1120 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(s1.f6) keys: s6['key1'].f20.f21.f22 (type: array) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1120 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: array) sort order: + Map-reduce partition columns: _col0 (type: array) - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1120 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator @@ -1833,14 +1833,14 @@ STAGE PLANS: keys: KEY._col0 (type: array) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1120 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col1 (type: bigint), _col0 (type: array) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1120 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1120 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/parquet_analyze.q.out b/ql/src/test/results/clientpositive/parquet_analyze.q.out index e746621afa..bdc09f5d3a 100644 --- a/ql/src/test/results/clientpositive/parquet_analyze.q.out +++ b/ql/src/test/results/clientpositive/parquet_analyze.q.out @@ -93,7 +93,7 @@ Table Parameters: bucketing_version 2 numFiles 1 numRows 100 - rawDataSize 700 + rawDataSize 5936 totalSize 6692 #### A masked pattern was here #### diff --git a/ql/src/test/results/clientpositive/parquet_complex_types_vectorization.q.out b/ql/src/test/results/clientpositive/parquet_complex_types_vectorization.q.out index 29e296bb13..d0fa7347ed 100644 --- a/ql/src/test/results/clientpositive/parquet_complex_types_vectorization.q.out +++ b/ql/src/test/results/clientpositive/parquet_complex_types_vectorization.q.out @@ -102,7 +102,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: parquet_complex_types - Statistics: Num rows: 1023 Data size: 5115 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1023 Data size: 62431 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true Select Operator @@ -113,19 +113,19 @@ STAGE PLANS: native: true projectedOutputColumnNums: [2, 6, 7, 8, 4] selectExpressions: ListIndexColScalar(col 2:array, col 0:int) -> 6:int, ListIndexColScalar(col 2:array, col 1:int) -> 7:int, ListIndexColColumn(col 2:array, col 4:int) -> 8:int - Statistics: Num rows: 1023 Data size: 5115 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1023 Data size: 62431 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 10 Limit Vectorization: className: VectorLimitOperator native: true - Statistics: Num rows: 10 Data size: 50 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 610 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 10 Data size: 50 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 610 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -185,7 +185,7 @@ STAGE PLANS: TableScan alias: parquet_complex_types filterExpr: (l1[0] > 1000) (type: boolean) - Statistics: Num rows: 1023 Data size: 5115 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1023 Data size: 62431 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true Filter Operator @@ -194,7 +194,7 @@ STAGE PLANS: native: true predicateExpression: FilterLongColGreaterLongScalar(col 6:int, val 1000)(children: ListIndexColScalar(col 2:array, col 0:int) -> 6:int) predicate: (l1[0] > 1000) (type: boolean) - Statistics: Num rows: 341 Data size: 1705 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 341 Data size: 20810 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: l1[1] (type: int), l1[0] (type: int) outputColumnNames: _col0, _col1 @@ -203,7 +203,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [6, 7] selectExpressions: ListIndexColScalar(col 2:array, col 1:int) -> 6:int, ListIndexColScalar(col 2:array, col 0:int) -> 7:int - Statistics: Num rows: 341 Data size: 1705 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 341 Data size: 20810 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(_col1) Group By Vectorization: @@ -217,7 +217,7 @@ STAGE PLANS: keys: _col0 (type: int) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 341 Data size: 1705 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 341 Data size: 20810 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + @@ -227,7 +227,7 @@ STAGE PLANS: native: false nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 341 Data size: 1705 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 341 Data size: 20810 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: bigint) Execution mode: vectorized @@ -250,11 +250,11 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 170 Data size: 850 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 170 Data size: 10374 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col1 (type: bigint), _col0 (type: int) outputColumnNames: _col0, _col2 - Statistics: Num rows: 170 Data size: 850 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 170 Data size: 10374 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: @@ -276,7 +276,7 @@ STAGE PLANS: native: false nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 170 Data size: 850 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 170 Data size: 10374 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: bigint) Execution mode: vectorized @@ -297,13 +297,13 @@ STAGE PLANS: Select Operator expressions: VALUE._col0 (type: bigint), KEY.reducesinkkey0 (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 170 Data size: 850 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 170 Data size: 10374 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 10 - Statistics: Num rows: 10 Data size: 50 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 610 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 10 Data size: 50 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 610 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -375,7 +375,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: parquet_complex_types - Statistics: Num rows: 1024 Data size: 5120 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1024 Data size: 62494 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true Select Operator @@ -386,19 +386,19 @@ STAGE PLANS: native: true projectedOutputColumnNums: [2, 6, 7, 8, 4] selectExpressions: ListIndexColScalar(col 2:array, col 0:int) -> 6:int, ListIndexColScalar(col 2:array, col 1:int) -> 7:int, ListIndexColColumn(col 2:array, col 4:int) -> 8:int - Statistics: Num rows: 1024 Data size: 5120 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1024 Data size: 62494 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 10 Limit Vectorization: className: VectorLimitOperator native: true - Statistics: Num rows: 10 Data size: 50 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 610 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 10 Data size: 50 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 610 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -458,7 +458,7 @@ STAGE PLANS: TableScan alias: parquet_complex_types filterExpr: (l1[0] > 1000) (type: boolean) - Statistics: Num rows: 1024 Data size: 5120 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1024 Data size: 62494 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true Filter Operator @@ -467,7 +467,7 @@ STAGE PLANS: native: true predicateExpression: FilterLongColGreaterLongScalar(col 6:int, val 1000)(children: ListIndexColScalar(col 2:array, col 0:int) -> 6:int) predicate: (l1[0] > 1000) (type: boolean) - Statistics: Num rows: 341 Data size: 1705 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 341 Data size: 20810 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: l1[1] (type: int), l1[0] (type: int) outputColumnNames: _col0, _col1 @@ -476,7 +476,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [6, 7] selectExpressions: ListIndexColScalar(col 2:array, col 1:int) -> 6:int, ListIndexColScalar(col 2:array, col 0:int) -> 7:int - Statistics: Num rows: 341 Data size: 1705 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 341 Data size: 20810 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(_col1) Group By Vectorization: @@ -490,7 +490,7 @@ STAGE PLANS: keys: _col0 (type: int) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 341 Data size: 1705 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 341 Data size: 20810 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + @@ -500,7 +500,7 @@ STAGE PLANS: native: false nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 341 Data size: 1705 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 341 Data size: 20810 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: bigint) Execution mode: vectorized @@ -523,11 +523,11 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 170 Data size: 850 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 170 Data size: 10374 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col1 (type: bigint), _col0 (type: int) outputColumnNames: _col0, _col2 - Statistics: Num rows: 170 Data size: 850 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 170 Data size: 10374 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: @@ -549,7 +549,7 @@ STAGE PLANS: native: false nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 170 Data size: 850 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 170 Data size: 10374 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: bigint) Execution mode: vectorized @@ -570,13 +570,13 @@ STAGE PLANS: Select Operator expressions: VALUE._col0 (type: bigint), KEY.reducesinkkey0 (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 170 Data size: 850 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 170 Data size: 10374 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 10 - Statistics: Num rows: 10 Data size: 50 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 610 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 10 Data size: 50 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 610 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -648,7 +648,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: parquet_complex_types - Statistics: Num rows: 1025 Data size: 5125 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1025 Data size: 62561 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true Select Operator @@ -659,19 +659,19 @@ STAGE PLANS: native: true projectedOutputColumnNums: [2, 6, 7, 8, 4] selectExpressions: ListIndexColScalar(col 2:array, col 0:int) -> 6:int, ListIndexColScalar(col 2:array, col 1:int) -> 7:int, ListIndexColColumn(col 2:array, col 4:int) -> 8:int - Statistics: Num rows: 1025 Data size: 5125 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1025 Data size: 62561 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 10 Limit Vectorization: className: VectorLimitOperator native: true - Statistics: Num rows: 10 Data size: 50 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 610 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 10 Data size: 50 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 610 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -731,7 +731,7 @@ STAGE PLANS: TableScan alias: parquet_complex_types filterExpr: (l1[0] > 1000) (type: boolean) - Statistics: Num rows: 1025 Data size: 5125 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1025 Data size: 62561 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true Filter Operator @@ -740,7 +740,7 @@ STAGE PLANS: native: true predicateExpression: FilterLongColGreaterLongScalar(col 6:int, val 1000)(children: ListIndexColScalar(col 2:array, col 0:int) -> 6:int) predicate: (l1[0] > 1000) (type: boolean) - Statistics: Num rows: 341 Data size: 1705 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 341 Data size: 20812 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: l1[1] (type: int), l1[0] (type: int) outputColumnNames: _col0, _col1 @@ -749,7 +749,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [6, 7] selectExpressions: ListIndexColScalar(col 2:array, col 1:int) -> 6:int, ListIndexColScalar(col 2:array, col 0:int) -> 7:int - Statistics: Num rows: 341 Data size: 1705 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 341 Data size: 20812 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(_col1) Group By Vectorization: @@ -763,7 +763,7 @@ STAGE PLANS: keys: _col0 (type: int) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 341 Data size: 1705 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 341 Data size: 20812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + @@ -773,7 +773,7 @@ STAGE PLANS: native: false nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 341 Data size: 1705 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 341 Data size: 20812 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: bigint) Execution mode: vectorized @@ -796,11 +796,11 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 170 Data size: 850 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 170 Data size: 10375 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col1 (type: bigint), _col0 (type: int) outputColumnNames: _col0, _col2 - Statistics: Num rows: 170 Data size: 850 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 170 Data size: 10375 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: @@ -822,7 +822,7 @@ STAGE PLANS: native: false nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 170 Data size: 850 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 170 Data size: 10375 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: bigint) Execution mode: vectorized @@ -843,13 +843,13 @@ STAGE PLANS: Select Operator expressions: VALUE._col0 (type: bigint), KEY.reducesinkkey0 (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 170 Data size: 850 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 170 Data size: 10375 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 10 - Statistics: Num rows: 10 Data size: 50 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 610 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 10 Data size: 50 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 610 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/parquet_join.q.out b/ql/src/test/results/clientpositive/parquet_join.q.out index f674411928..97ddcc9171 100644 --- a/ql/src/test/results/clientpositive/parquet_join.q.out +++ b/ql/src/test/results/clientpositive/parquet_join.q.out @@ -76,35 +76,35 @@ STAGE PLANS: TableScan alias: p1 filterExpr: key is not null (type: boolean) - Statistics: Num rows: 2 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 120 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 2 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 120 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 120 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 2 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 120 Basic stats: COMPLETE Column stats: NONE TableScan alias: p2 filterExpr: key is not null (type: boolean) - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 199 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 199 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int), myvalue (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 199 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 199 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) Reduce Operator Tree: Join Operator @@ -114,14 +114,14 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col2 - Statistics: Num rows: 2 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 132 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col2 (type: string) outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 132 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 2 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 132 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -166,14 +166,14 @@ STAGE PLANS: TableScan alias: p1 filterExpr: key is not null (type: boolean) - Statistics: Num rows: 2 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 120 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 2 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 120 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 120 Basic stats: COMPLETE Column stats: NONE HashTable Sink Operator keys: 0 _col0 (type: int) @@ -185,14 +185,14 @@ STAGE PLANS: TableScan alias: p2 filterExpr: key is not null (type: boolean) - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 199 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 199 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int), myvalue (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 199 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 @@ -200,14 +200,14 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col2 - Statistics: Num rows: 2 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 132 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col2 (type: string) outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 132 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 2 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 132 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -285,14 +285,14 @@ STAGE PLANS: TableScan alias: p2 filterExpr: key is not null (type: boolean) - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 245 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 245 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int), value2 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 245 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 diff --git a/ql/src/test/results/clientpositive/parquet_map_type_vectorization.q.out b/ql/src/test/results/clientpositive/parquet_map_type_vectorization.q.out index 49cf3a2242..1ee412b672 100644 --- a/ql/src/test/results/clientpositive/parquet_map_type_vectorization.q.out +++ b/ql/src/test/results/clientpositive/parquet_map_type_vectorization.q.out @@ -114,7 +114,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: parquet_map_type - Statistics: Num rows: 1023 Data size: 7161 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1023 Data size: 49969 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true Select Operator @@ -125,19 +125,19 @@ STAGE PLANS: native: true projectedOutputColumnNums: [1, 2, 3, 8, 9, 10, 11, 12, 13] selectExpressions: VectorUDFMapIndexStringScalar(col 1:map, key: k2) -> 8:string, VectorUDFMapIndexLongScalar(col 2:map, key: 456) -> 9:int, VectorUDFMapIndexDoubleScalar(col 3:map, key: 123.123) -> 10:double, VectorUDFMapIndexStringCol(col 1:map, key: col 4:string) -> 11:string, VectorUDFMapIndexLongCol(col 2:map, key: col 5:int) -> 12:int, VectorUDFMapIndexDoubleCol(col 3:map, key: col 6:double) -> 13:double - Statistics: Num rows: 1023 Data size: 7161 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1023 Data size: 49969 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 10 Limit Vectorization: className: VectorLimitOperator native: true - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 480 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 480 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -201,7 +201,7 @@ STAGE PLANS: TableScan alias: parquet_map_type filterExpr: (stringmap['k1'] like 'v100%') (type: boolean) - Statistics: Num rows: 1023 Data size: 7161 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1023 Data size: 49969 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true Filter Operator @@ -210,7 +210,7 @@ STAGE PLANS: native: true predicateExpression: FilterStringColLikeStringScalar(col 8:string, pattern v100%)(children: VectorUDFMapIndexStringScalar(col 1:map, key: k1) -> 8:string) predicate: (stringmap['k1'] like 'v100%') (type: boolean) - Statistics: Num rows: 511 Data size: 3577 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 511 Data size: 24960 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: stringmap['k1'] (type: string), intmap[123] (type: int), doublemap[123.123] (type: double) outputColumnNames: _col0, _col1, _col2 @@ -219,7 +219,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [8, 9, 10] selectExpressions: VectorUDFMapIndexStringScalar(col 1:map, key: k1) -> 8:string, VectorUDFMapIndexLongScalar(col 2:map, key: 123) -> 9:int, VectorUDFMapIndexDoubleScalar(col 3:map, key: 123.123) -> 10:double - Statistics: Num rows: 511 Data size: 3577 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 511 Data size: 24960 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(_col1), sum(_col2) Group By Vectorization: @@ -233,7 +233,7 @@ STAGE PLANS: keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 511 Data size: 3577 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 511 Data size: 24960 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + @@ -243,7 +243,7 @@ STAGE PLANS: native: false nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 511 Data size: 3577 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 511 Data size: 24960 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: bigint), _col2 (type: double) Execution mode: vectorized @@ -266,11 +266,11 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 255 Data size: 1785 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 255 Data size: 12455 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col1 (type: bigint), _col2 (type: double), _col0 (type: string) outputColumnNames: _col0, _col1, _col3 - Statistics: Num rows: 255 Data size: 1785 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 255 Data size: 12455 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: @@ -292,7 +292,7 @@ STAGE PLANS: native: false nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 255 Data size: 1785 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 255 Data size: 12455 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: bigint), _col1 (type: double) Execution mode: vectorized @@ -313,13 +313,13 @@ STAGE PLANS: Select Operator expressions: VALUE._col0 (type: bigint), VALUE._col1 (type: double), KEY.reducesinkkey0 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 255 Data size: 1785 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 255 Data size: 12455 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 10 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 480 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 480 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/parquet_no_row_serde.q.out b/ql/src/test/results/clientpositive/parquet_no_row_serde.q.out index 02d7586838..e4df4a5e19 100644 --- a/ql/src/test/results/clientpositive/parquet_no_row_serde.q.out +++ b/ql/src/test/results/clientpositive/parquet_no_row_serde.q.out @@ -139,15 +139,15 @@ STAGE PLANS: Map Operator Tree: TableScan alias: tbl_parquet - Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 60 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: val (type: decimal(10,0)), round(val, -1) (type: decimal(11,0)) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 60 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: decimal(10,0)) sort order: + - Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 60 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: decimal(11,0)) Map Vectorization: enabled: false @@ -161,10 +161,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: decimal(10,0)), VALUE._col0 (type: decimal(11,0)) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 60 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 60 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/parquet_stats.q.out b/ql/src/test/results/clientpositive/parquet_stats.q.out new file mode 100644 index 0000000000..3bc6554bfd --- /dev/null +++ b/ql/src/test/results/clientpositive/parquet_stats.q.out @@ -0,0 +1,63 @@ +PREHOOK: query: DROP TABLE if exists parquet_stats +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE if exists parquet_stats +POSTHOOK: type: DROPTABLE +PREHOOK: query: CREATE TABLE parquet_stats ( + id int, + str string +) STORED AS PARQUET +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@parquet_stats +POSTHOOK: query: CREATE TABLE parquet_stats ( + id int, + str string +) STORED AS PARQUET +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@parquet_stats +PREHOOK: query: INSERT INTO parquet_stats values(0, 'this is string 0'), (1, 'string 1') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@parquet_stats +POSTHOOK: query: INSERT INTO parquet_stats values(0, 'this is string 0'), (1, 'string 1') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@parquet_stats +POSTHOOK: Lineage: parquet_stats.id SCRIPT [] +POSTHOOK: Lineage: parquet_stats.str SCRIPT [] +PREHOOK: query: DESC FORMATTED parquet_stats +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@parquet_stats +POSTHOOK: query: DESC FORMATTED parquet_stats +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@parquet_stats +# col_name data_type comment +id int +str string + +# Detailed Table Information +Database: default +#### A masked pattern was here #### +Retention: 0 +#### A masked pattern was here #### +Table Type: MANAGED_TABLE +Table Parameters: + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"id\":\"true\",\"str\":\"true\"}} + bucketing_version 2 + numFiles 1 + numRows 2 + rawDataSize 146 + totalSize 431 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe +InputFormat: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 diff --git a/ql/src/test/results/clientpositive/parquet_vectorization_13.q.out b/ql/src/test/results/clientpositive/parquet_vectorization_13.q.out index 6459e29e81..7ba43cbda7 100644 --- a/ql/src/test/results/clientpositive/parquet_vectorization_13.q.out +++ b/ql/src/test/results/clientpositive/parquet_vectorization_13.q.out @@ -80,7 +80,7 @@ STAGE PLANS: TableScan alias: alltypesparquet filterExpr: (((cfloat < 3569) and (cdouble <= 10.175D) and (cboolean1 <> 1)) or ((UDFToDouble(ctimestamp1) > -28789.0D) and (UDFToDouble(ctimestamp2) <> -28788.0D) and (CAST( ctinyint AS decimal(11,4)) < 9763215.5639))) (type: boolean) - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 5951030 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct] @@ -90,7 +90,7 @@ STAGE PLANS: native: true predicateExpression: FilterExprOrExpr(children: FilterExprAndExpr(children: FilterDoubleColLessDoubleScalar(col 4:float, val 3569.0), FilterDoubleColLessEqualDoubleScalar(col 5:double, val 10.175), FilterLongColNotEqualLongScalar(col 10:boolean, val 1)), FilterExprAndExpr(children: FilterDoubleColGreaterDoubleScalar(col 13:double, val -28789.0)(children: CastTimestampToDouble(col 8:timestamp) -> 13:double), FilterDoubleColNotEqualDoubleScalar(col 13:double, val -28788.0)(children: CastTimestampToDouble(col 9:timestamp) -> 13:double), FilterDecimalColLessDecimalScalar(col 14:decimal(11,4), val 9763215.5639)(children: CastLongToDecimal(col 0:tinyint) -> 14:decimal(11,4)))) predicate: (((UDFToDouble(ctimestamp1) > -28789.0D) and (UDFToDouble(ctimestamp2) <> -28788.0D) and (CAST( ctinyint AS decimal(11,4)) < 9763215.5639)) or ((cfloat < 3569) and (cdouble <= 10.175D) and (cboolean1 <> 1))) (type: boolean) - Statistics: Num rows: 2730 Data size: 32760 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2730 Data size: 1322128 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cboolean1 (type: boolean), ctinyint (type: tinyint), ctimestamp1 (type: timestamp), cfloat (type: float), cstring1 (type: string), UDFToDouble(cfloat) (type: double), (UDFToDouble(cfloat) * UDFToDouble(cfloat)) (type: double), UDFToDouble(ctinyint) (type: double), (UDFToDouble(ctinyint) * UDFToDouble(ctinyint)) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 @@ -99,7 +99,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [10, 0, 8, 4, 6, 4, 13, 15, 18] selectExpressions: DoubleColMultiplyDoubleColumn(col 4:double, col 4:double)(children: col 4:float, col 4:float) -> 13:double, CastLongToDouble(col 0:tinyint) -> 15:double, DoubleColMultiplyDoubleColumn(col 16:double, col 17:double)(children: CastLongToDouble(col 0:tinyint) -> 16:double, CastLongToDouble(col 0:tinyint) -> 17:double) -> 18:double - Statistics: Num rows: 2730 Data size: 32760 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2730 Data size: 1322128 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: max(_col1), sum(_col3), sum(_col6), sum(_col5), count(_col3), sum(_col8), sum(_col7), count(_col1), max(_col3), min(_col1) Group By Vectorization: @@ -113,7 +113,7 @@ STAGE PLANS: keys: _col0 (type: boolean), _col1 (type: tinyint), _col2 (type: timestamp), _col3 (type: float), _col4 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 - Statistics: Num rows: 2730 Data size: 32760 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2730 Data size: 1322128 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: boolean), _col1 (type: tinyint), _col2 (type: timestamp), _col3 (type: float), _col4 (type: string) sort order: +++++ @@ -123,7 +123,7 @@ STAGE PLANS: native: false nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 2730 Data size: 32760 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2730 Data size: 1322128 Basic stats: COMPLETE Column stats: NONE value expressions: _col5 (type: tinyint), _col6 (type: double), _col7 (type: double), _col8 (type: double), _col9 (type: bigint), _col10 (type: double), _col11 (type: double), _col12 (type: bigint), _col13 (type: float), _col14 (type: tinyint) Execution mode: vectorized Map Vectorization: @@ -151,11 +151,11 @@ STAGE PLANS: keys: KEY._col0 (type: boolean), KEY._col1 (type: tinyint), KEY._col2 (type: timestamp), KEY._col3 (type: float), KEY._col4 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 - Statistics: Num rows: 1365 Data size: 16380 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1365 Data size: 661064 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: boolean), _col1 (type: tinyint), _col2 (type: timestamp), _col3 (type: float), _col4 (type: string), (- _col1) (type: tinyint), _col5 (type: tinyint), ((- _col1) + _col5) (type: tinyint), _col6 (type: double), (_col6 * UDFToDouble(((- _col1) + _col5))) (type: double), (- _col6) (type: double), (79.553 * _col3) (type: float), power(((_col7 - ((_col8 * _col8) / _col9)) / _col9), 0.5) (type: double), (- _col6) (type: double), power(((_col10 - ((_col11 * _col11) / _col12)) / _col12), 0.5) (type: double), (CAST( ((- _col1) + _col5) AS decimal(3,0)) - 10.175) (type: decimal(7,3)), (- (- _col6)) (type: double), (-26.28D / (- (- _col6))) (type: double), _col13 (type: float), ((_col6 * UDFToDouble(((- _col1) + _col5))) / UDFToDouble(_col1)) (type: double), _col14 (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 - Statistics: Num rows: 1365 Data size: 16380 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1365 Data size: 661064 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: @@ -178,7 +178,7 @@ STAGE PLANS: native: false nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 1365 Data size: 16380 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1365 Data size: 661064 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 Execution mode: vectorized Map Vectorization: @@ -204,13 +204,13 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: boolean), KEY.reducesinkkey1 (type: tinyint), KEY.reducesinkkey2 (type: timestamp), KEY.reducesinkkey3 (type: float), KEY.reducesinkkey4 (type: string), KEY.reducesinkkey5 (type: tinyint), KEY.reducesinkkey6 (type: tinyint), KEY.reducesinkkey7 (type: tinyint), KEY.reducesinkkey8 (type: double), KEY.reducesinkkey9 (type: double), KEY.reducesinkkey10 (type: double), KEY.reducesinkkey11 (type: float), KEY.reducesinkkey12 (type: double), KEY.reducesinkkey10 (type: double), KEY.reducesinkkey14 (type: double), KEY.reducesinkkey15 (type: decimal(7,3)), KEY.reducesinkkey16 (type: double), KEY.reducesinkkey17 (type: double), KEY.reducesinkkey18 (type: float), KEY.reducesinkkey19 (type: double), KEY.reducesinkkey20 (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 - Statistics: Num rows: 1365 Data size: 16380 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1365 Data size: 661064 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 40 - Statistics: Num rows: 40 Data size: 480 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 19360 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 40 Data size: 480 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 19360 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -412,7 +412,7 @@ STAGE PLANS: TableScan alias: alltypesparquet filterExpr: (((cfloat < 3569) and (cdouble <= 10.175D) and (cboolean1 <> 1)) or ((UDFToDouble(ctimestamp1) > -28801.388D) and (UDFToDouble(ctimestamp2) <> -28801.336D) and (CAST( ctinyint AS decimal(11,4)) < 9763215.5639))) (type: boolean) - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 5951030 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true Filter Operator @@ -421,7 +421,7 @@ STAGE PLANS: native: true predicateExpression: FilterExprOrExpr(children: FilterExprAndExpr(children: FilterDoubleColLessDoubleScalar(col 4:float, val 3569.0), FilterDoubleColLessEqualDoubleScalar(col 5:double, val 10.175), FilterLongColNotEqualLongScalar(col 10:boolean, val 1)), FilterExprAndExpr(children: FilterDoubleColGreaterDoubleScalar(col 13:double, val -28801.388)(children: CastTimestampToDouble(col 8:timestamp) -> 13:double), FilterDoubleColNotEqualDoubleScalar(col 13:double, val -28801.336)(children: CastTimestampToDouble(col 9:timestamp) -> 13:double), FilterDecimalColLessDecimalScalar(col 14:decimal(11,4), val 9763215.5639)(children: CastLongToDecimal(col 0:tinyint) -> 14:decimal(11,4)))) predicate: (((UDFToDouble(ctimestamp1) > -28801.388D) and (UDFToDouble(ctimestamp2) <> -28801.336D) and (CAST( ctinyint AS decimal(11,4)) < 9763215.5639)) or ((cfloat < 3569) and (cdouble <= 10.175D) and (cboolean1 <> 1))) (type: boolean) - Statistics: Num rows: 2730 Data size: 32760 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2730 Data size: 1322128 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cboolean1 (type: boolean), ctinyint (type: tinyint), ctimestamp1 (type: timestamp), cfloat (type: float), cstring1 (type: string), UDFToDouble(cfloat) (type: double), (UDFToDouble(cfloat) * UDFToDouble(cfloat)) (type: double), UDFToDouble(ctinyint) (type: double), (UDFToDouble(ctinyint) * UDFToDouble(ctinyint)) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 @@ -430,7 +430,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [10, 0, 8, 4, 6, 4, 13, 15, 18] selectExpressions: DoubleColMultiplyDoubleColumn(col 4:double, col 4:double)(children: col 4:float, col 4:float) -> 13:double, CastLongToDouble(col 0:tinyint) -> 15:double, DoubleColMultiplyDoubleColumn(col 16:double, col 17:double)(children: CastLongToDouble(col 0:tinyint) -> 16:double, CastLongToDouble(col 0:tinyint) -> 17:double) -> 18:double - Statistics: Num rows: 2730 Data size: 32760 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2730 Data size: 1322128 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: max(_col1), sum(_col3), sum(_col6), sum(_col5), count(_col3), sum(_col8), sum(_col7), count(_col1), max(_col3), min(_col1) Group By Vectorization: @@ -444,7 +444,7 @@ STAGE PLANS: keys: _col0 (type: boolean), _col1 (type: tinyint), _col2 (type: timestamp), _col3 (type: float), _col4 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 - Statistics: Num rows: 2730 Data size: 32760 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2730 Data size: 1322128 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: boolean), _col1 (type: tinyint), _col2 (type: timestamp), _col3 (type: float), _col4 (type: string) sort order: +++++ @@ -454,7 +454,7 @@ STAGE PLANS: native: false nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 2730 Data size: 32760 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2730 Data size: 1322128 Basic stats: COMPLETE Column stats: NONE value expressions: _col5 (type: tinyint), _col6 (type: double), _col7 (type: double), _col8 (type: double), _col9 (type: bigint), _col10 (type: double), _col11 (type: double), _col12 (type: bigint), _col13 (type: float), _col14 (type: tinyint) Execution mode: vectorized Map Vectorization: @@ -476,11 +476,11 @@ STAGE PLANS: keys: KEY._col0 (type: boolean), KEY._col1 (type: tinyint), KEY._col2 (type: timestamp), KEY._col3 (type: float), KEY._col4 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 - Statistics: Num rows: 1365 Data size: 16380 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1365 Data size: 661064 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: boolean), _col1 (type: tinyint), _col2 (type: timestamp), _col3 (type: float), _col4 (type: string), (- _col1) (type: tinyint), _col5 (type: tinyint), ((- _col1) + _col5) (type: tinyint), _col6 (type: double), (_col6 * UDFToDouble(((- _col1) + _col5))) (type: double), (- _col6) (type: double), (79.553 * _col3) (type: float), power(((_col7 - ((_col8 * _col8) / _col9)) / _col9), 0.5) (type: double), (- _col6) (type: double), power(((_col10 - ((_col11 * _col11) / _col12)) / _col12), 0.5) (type: double), (CAST( ((- _col1) + _col5) AS decimal(3,0)) - 10.175) (type: decimal(7,3)), (- (- _col6)) (type: double), (-26.28D / (- (- _col6))) (type: double), _col13 (type: float), ((_col6 * UDFToDouble(((- _col1) + _col5))) / UDFToDouble(_col1)) (type: double), _col14 (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 - Statistics: Num rows: 1365 Data size: 16380 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1365 Data size: 661064 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: @@ -502,7 +502,7 @@ STAGE PLANS: native: false nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 1365 Data size: 16380 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1365 Data size: 661064 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 Execution mode: vectorized Map Vectorization: @@ -522,13 +522,13 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: boolean), KEY.reducesinkkey1 (type: tinyint), KEY.reducesinkkey2 (type: timestamp), KEY.reducesinkkey3 (type: float), KEY.reducesinkkey4 (type: string), KEY.reducesinkkey5 (type: tinyint), KEY.reducesinkkey6 (type: tinyint), KEY.reducesinkkey7 (type: tinyint), KEY.reducesinkkey8 (type: double), KEY.reducesinkkey9 (type: double), KEY.reducesinkkey10 (type: double), KEY.reducesinkkey11 (type: float), KEY.reducesinkkey12 (type: double), KEY.reducesinkkey10 (type: double), KEY.reducesinkkey14 (type: double), KEY.reducesinkkey15 (type: decimal(7,3)), KEY.reducesinkkey16 (type: double), KEY.reducesinkkey17 (type: double), KEY.reducesinkkey18 (type: float), KEY.reducesinkkey19 (type: double), KEY.reducesinkkey20 (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 - Statistics: Num rows: 1365 Data size: 16380 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1365 Data size: 661064 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 40 - Statistics: Num rows: 40 Data size: 480 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 19360 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 40 Data size: 480 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 19360 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/parquet_vectorization_14.q.out b/ql/src/test/results/clientpositive/parquet_vectorization_14.q.out index 1eab96280b..176e9d06ee 100644 --- a/ql/src/test/results/clientpositive/parquet_vectorization_14.q.out +++ b/ql/src/test/results/clientpositive/parquet_vectorization_14.q.out @@ -80,7 +80,7 @@ STAGE PLANS: TableScan alias: alltypesparquet filterExpr: ((UDFToLong(ctinyint) <= cbigint) and ((UDFToDouble(cint) <= cdouble) or (ctimestamp2 < ctimestamp1)) and (cdouble < UDFToDouble(ctinyint)) and ((cbigint > -257L) or (cfloat < UDFToFloat(cint)))) (type: boolean) - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 5951030 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct] @@ -90,7 +90,7 @@ STAGE PLANS: native: true predicateExpression: FilterExprAndExpr(children: FilterLongColLessEqualLongColumn(col 0:bigint, col 3:bigint)(children: col 0:tinyint), FilterExprOrExpr(children: FilterDoubleColLessEqualDoubleColumn(col 13:double, col 5:double)(children: CastLongToDouble(col 2:int) -> 13:double), FilterTimestampColLessTimestampColumn(col 9:timestamp, col 8:timestamp)), FilterDoubleColLessDoubleColumn(col 5:double, col 13:double)(children: CastLongToDouble(col 0:tinyint) -> 13:double), FilterExprOrExpr(children: FilterLongColGreaterLongScalar(col 3:bigint, val -257), FilterDoubleColLessDoubleColumn(col 4:float, col 13:float)(children: CastLongToFloatViaLongToDouble(col 2:int) -> 13:float))) predicate: (((UDFToDouble(cint) <= cdouble) or (ctimestamp2 < ctimestamp1)) and ((cbigint > -257L) or (cfloat < UDFToFloat(cint))) and (UDFToLong(ctinyint) <= cbigint) and (cdouble < UDFToDouble(ctinyint))) (type: boolean) - Statistics: Num rows: 606 Data size: 7272 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 606 Data size: 293483 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ctimestamp1 (type: timestamp), cfloat (type: float), cstring1 (type: string), cboolean1 (type: boolean), cdouble (type: double), (- (-26.28D + cdouble)) (type: double), ((- (-26.28D + cdouble)) * (- (-26.28D + cdouble))) (type: double), UDFToDouble(cfloat) (type: double), (UDFToDouble(cfloat) * UDFToDouble(cfloat)) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 @@ -99,7 +99,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [8, 4, 6, 10, 5, 14, 13, 4, 15] selectExpressions: DoubleColUnaryMinus(col 13:double)(children: DoubleScalarAddDoubleColumn(val -26.28, col 5:double) -> 13:double) -> 14:double, DoubleColMultiplyDoubleColumn(col 15:double, col 16:double)(children: DoubleColUnaryMinus(col 13:double)(children: DoubleScalarAddDoubleColumn(val -26.28, col 5:double) -> 13:double) -> 15:double, DoubleColUnaryMinus(col 13:double)(children: DoubleScalarAddDoubleColumn(val -26.28, col 5:double) -> 13:double) -> 16:double) -> 13:double, DoubleColMultiplyDoubleColumn(col 4:double, col 4:double)(children: col 4:float, col 4:float) -> 15:double - Statistics: Num rows: 606 Data size: 7272 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 606 Data size: 293483 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(_col6), sum(_col5), count(_col5), max(_col1), sum(_col8), sum(_col7), count(_col1) Group By Vectorization: @@ -113,7 +113,7 @@ STAGE PLANS: keys: _col2 (type: string), _col1 (type: float), _col4 (type: double), _col0 (type: timestamp), _col3 (type: boolean) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 - Statistics: Num rows: 606 Data size: 7272 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 606 Data size: 293483 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: float), _col2 (type: double), _col3 (type: timestamp), _col4 (type: boolean) sort order: +++++ @@ -123,7 +123,7 @@ STAGE PLANS: native: false nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 606 Data size: 7272 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 606 Data size: 293483 Basic stats: COMPLETE Column stats: NONE value expressions: _col5 (type: double), _col6 (type: double), _col7 (type: bigint), _col8 (type: float), _col9 (type: double), _col10 (type: double), _col11 (type: bigint) Execution mode: vectorized Map Vectorization: @@ -151,11 +151,11 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: float), KEY._col2 (type: double), KEY._col3 (type: timestamp), KEY._col4 (type: boolean) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 - Statistics: Num rows: 303 Data size: 3636 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 303 Data size: 146741 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col3 (type: timestamp), _col1 (type: float), _col0 (type: string), _col4 (type: boolean), _col2 (type: double), (-26.28D + _col2) (type: double), (- (-26.28D + _col2)) (type: double), power(((_col5 - ((_col6 * _col6) / _col7)) / CASE WHEN ((_col7 = 1L)) THEN (null) ELSE ((_col7 - 1)) END), 0.5) (type: double), (_col1 * -26.28) (type: float), _col8 (type: float), (- _col1) (type: float), (- _col8) (type: float), ((- (-26.28D + _col2)) / 10.175D) (type: double), power(((_col9 - ((_col10 * _col10) / _col11)) / _col11), 0.5) (type: double), _col11 (type: bigint), (- ((- (-26.28D + _col2)) / 10.175D)) (type: double), (-1.389D % power(((_col5 - ((_col6 * _col6) / _col7)) / CASE WHEN ((_col7 = 1L)) THEN (null) ELSE ((_col7 - 1)) END), 0.5)) (type: double), (UDFToDouble(_col1) - _col2) (type: double), ((_col9 - ((_col10 * _col10) / _col11)) / _col11) (type: double), (((_col9 - ((_col10 * _col10) / _col11)) / _col11) % 10.175D) (type: double), ((_col9 - ((_col10 * _col10) / _col11)) / CASE WHEN ((_col11 = 1L)) THEN (null) ELSE ((_col11 - 1)) END) (type: double), (- (UDFToDouble(_col1) - _col2)) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21 - Statistics: Num rows: 303 Data size: 3636 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 303 Data size: 146741 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: @@ -178,7 +178,7 @@ STAGE PLANS: native: false nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 303 Data size: 3636 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 303 Data size: 146741 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: boolean), _col5 (type: double), _col6 (type: double), _col7 (type: double), _col8 (type: float), _col9 (type: float), _col10 (type: float), _col11 (type: float), _col12 (type: double), _col13 (type: double), _col14 (type: bigint), _col15 (type: double), _col16 (type: double), _col17 (type: double), _col18 (type: double), _col19 (type: double), _col20 (type: double), _col21 (type: double) Execution mode: vectorized Map Vectorization: @@ -204,10 +204,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey3 (type: timestamp), KEY.reducesinkkey1 (type: float), KEY.reducesinkkey0 (type: string), VALUE._col0 (type: boolean), KEY.reducesinkkey2 (type: double), VALUE._col1 (type: double), VALUE._col2 (type: double), VALUE._col3 (type: double), VALUE._col4 (type: float), VALUE._col5 (type: float), VALUE._col6 (type: float), VALUE._col7 (type: float), VALUE._col8 (type: double), VALUE._col9 (type: double), VALUE._col10 (type: bigint), VALUE._col11 (type: double), VALUE._col12 (type: double), VALUE._col13 (type: double), VALUE._col14 (type: double), VALUE._col15 (type: double), VALUE._col16 (type: double), VALUE._col17 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21 - Statistics: Num rows: 303 Data size: 3636 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 303 Data size: 146741 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 303 Data size: 3636 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 303 Data size: 146741 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/parquet_vectorization_15.q.out b/ql/src/test/results/clientpositive/parquet_vectorization_15.q.out index 2d306cf6c1..e6f322bfda 100644 --- a/ql/src/test/results/clientpositive/parquet_vectorization_15.q.out +++ b/ql/src/test/results/clientpositive/parquet_vectorization_15.q.out @@ -76,7 +76,7 @@ STAGE PLANS: TableScan alias: alltypesparquet filterExpr: ((cstring2 like '%ss%') or (cstring1 like '10%') or ((cint >= -75) and (UDFToShort(ctinyint) = csmallint) and (cdouble >= -3728.0D))) (type: boolean) - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 5951030 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct] @@ -86,7 +86,7 @@ STAGE PLANS: native: true predicateExpression: FilterExprOrExpr(children: FilterStringColLikeStringScalar(col 7:string, pattern %ss%), FilterStringColLikeStringScalar(col 6:string, pattern 10%), FilterExprAndExpr(children: FilterLongColGreaterEqualLongScalar(col 2:int, val -75), FilterLongColEqualLongColumn(col 0:smallint, col 1:smallint)(children: col 0:tinyint), FilterDoubleColGreaterEqualDoubleScalar(col 5:double, val -3728.0))) predicate: (((cint >= -75) and (UDFToShort(ctinyint) = csmallint) and (cdouble >= -3728.0D)) or (cstring1 like '10%') or (cstring2 like '%ss%')) (type: boolean) - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 5951030 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cfloat (type: float), cboolean1 (type: boolean), cdouble (type: double), cstring1 (type: string), ctinyint (type: tinyint), cint (type: int), ctimestamp1 (type: timestamp), UDFToDouble(cfloat) (type: double), (UDFToDouble(cfloat) * UDFToDouble(cfloat)) (type: double), UDFToDouble(ctinyint) (type: double), (UDFToDouble(ctinyint) * UDFToDouble(ctinyint)) (type: double), UDFToDouble(cint) (type: double), (UDFToDouble(cint) * UDFToDouble(cint)) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 @@ -95,7 +95,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [4, 10, 5, 6, 0, 2, 8, 4, 13, 14, 17, 15, 19] selectExpressions: DoubleColMultiplyDoubleColumn(col 4:double, col 4:double)(children: col 4:float, col 4:float) -> 13:double, CastLongToDouble(col 0:tinyint) -> 14:double, DoubleColMultiplyDoubleColumn(col 15:double, col 16:double)(children: CastLongToDouble(col 0:tinyint) -> 15:double, CastLongToDouble(col 0:tinyint) -> 16:double) -> 17:double, CastLongToDouble(col 2:int) -> 15:double, DoubleColMultiplyDoubleColumn(col 16:double, col 18:double)(children: CastLongToDouble(col 2:int) -> 16:double, CastLongToDouble(col 2:int) -> 18:double) -> 19:double - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 5951030 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(_col8), sum(_col7), count(_col0), min(_col2), sum(_col10), sum(_col9), count(_col4), sum(_col12), sum(_col11), count(_col5) Group By Vectorization: @@ -109,7 +109,7 @@ STAGE PLANS: keys: _col0 (type: float), _col1 (type: boolean), _col2 (type: double), _col3 (type: string), _col4 (type: tinyint), _col5 (type: int), _col6 (type: timestamp) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16 - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 5951030 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: float), _col1 (type: boolean), _col2 (type: double), _col3 (type: string), _col4 (type: tinyint), _col5 (type: int), _col6 (type: timestamp) sort order: +++++++ @@ -119,7 +119,7 @@ STAGE PLANS: native: false nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 5951030 Basic stats: COMPLETE Column stats: NONE value expressions: _col7 (type: double), _col8 (type: double), _col9 (type: bigint), _col10 (type: double), _col11 (type: double), _col12 (type: double), _col13 (type: bigint), _col14 (type: double), _col15 (type: double), _col16 (type: bigint) Execution mode: vectorized Map Vectorization: @@ -146,11 +146,11 @@ STAGE PLANS: keys: KEY._col0 (type: float), KEY._col1 (type: boolean), KEY._col2 (type: double), KEY._col3 (type: string), KEY._col4 (type: tinyint), KEY._col5 (type: int), KEY._col6 (type: timestamp) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16 - Statistics: Num rows: 6144 Data size: 73728 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6144 Data size: 2975515 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: float), _col1 (type: boolean), _col2 (type: double), _col3 (type: string), _col4 (type: tinyint), _col5 (type: int), _col6 (type: timestamp), power(((_col7 - ((_col8 * _col8) / _col9)) / CASE WHEN ((_col9 = 1L)) THEN (null) ELSE ((_col9 - 1)) END), 0.5) (type: double), (-26.28 - CAST( _col5 AS decimal(10,0))) (type: decimal(13,2)), _col10 (type: double), (_col2 * 79.553D) (type: double), (33.0 % _col0) (type: float), power(((_col11 - ((_col12 * _col12) / _col13)) / CASE WHEN ((_col13 = 1L)) THEN (null) ELSE ((_col13 - 1)) END), 0.5) (type: double), ((_col11 - ((_col12 * _col12) / _col13)) / _col13) (type: double), (-23.0D % _col2) (type: double), (- _col4) (type: tinyint), ((_col14 - ((_col15 * _col15) / _col16)) / CASE WHEN ((_col16 = 1L)) THEN (null) ELSE ((_col16 - 1)) END) (type: double), (UDFToFloat(_col5) - _col0) (type: float), (-23 % UDFToInteger(_col4)) (type: int), (- (-26.28 - CAST( _col5 AS decimal(10,0)))) (type: decimal(13,2)), power(((_col14 - ((_col15 * _col15) / _col16)) / _col16), 0.5) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 - Statistics: Num rows: 6144 Data size: 73728 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6144 Data size: 2975515 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: @@ -173,7 +173,7 @@ STAGE PLANS: native: false nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 6144 Data size: 73728 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6144 Data size: 2975515 Basic stats: COMPLETE Column stats: NONE value expressions: _col7 (type: double), _col8 (type: decimal(13,2)), _col9 (type: double), _col10 (type: double), _col11 (type: float), _col12 (type: double), _col13 (type: double), _col14 (type: double), _col15 (type: tinyint), _col16 (type: double), _col17 (type: float), _col18 (type: int), _col19 (type: decimal(13,2)), _col20 (type: double) Execution mode: vectorized Map Vectorization: @@ -198,10 +198,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: float), KEY.reducesinkkey1 (type: boolean), KEY.reducesinkkey2 (type: double), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: tinyint), KEY.reducesinkkey5 (type: int), KEY.reducesinkkey6 (type: timestamp), VALUE._col0 (type: double), VALUE._col1 (type: decimal(13,2)), VALUE._col2 (type: double), VALUE._col3 (type: double), VALUE._col4 (type: float), VALUE._col5 (type: double), VALUE._col6 (type: double), VALUE._col7 (type: double), VALUE._col8 (type: tinyint), VALUE._col9 (type: double), VALUE._col10 (type: float), VALUE._col11 (type: int), VALUE._col12 (type: decimal(13,2)), VALUE._col13 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 - Statistics: Num rows: 6144 Data size: 73728 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6144 Data size: 2975515 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 6144 Data size: 73728 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6144 Data size: 2975515 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/parquet_vectorization_16.q.out b/ql/src/test/results/clientpositive/parquet_vectorization_16.q.out index fa317902b4..5eac26ac96 100644 --- a/ql/src/test/results/clientpositive/parquet_vectorization_16.q.out +++ b/ql/src/test/results/clientpositive/parquet_vectorization_16.q.out @@ -53,7 +53,7 @@ STAGE PLANS: TableScan alias: alltypesparquet filterExpr: ((cstring2 like '%b%') and ((cdouble >= -1.389D) or (cstring1 < 'a'))) (type: boolean) - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 5951030 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct] @@ -63,7 +63,7 @@ STAGE PLANS: native: true predicateExpression: FilterExprAndExpr(children: FilterStringColLikeStringScalar(col 7:string, pattern %b%), FilterExprOrExpr(children: FilterDoubleColGreaterEqualDoubleScalar(col 5:double, val -1.389), FilterStringGroupColLessStringScalar(col 6:string, val a))) predicate: (((cdouble >= -1.389D) or (cstring1 < 'a')) and (cstring2 like '%b%')) (type: boolean) - Statistics: Num rows: 4096 Data size: 49152 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4096 Data size: 1983676 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cstring1 (type: string), cdouble (type: double), ctimestamp1 (type: timestamp), (cdouble * cdouble) (type: double) outputColumnNames: _col0, _col1, _col2, _col3 @@ -72,7 +72,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [6, 5, 8, 13] selectExpressions: DoubleColMultiplyDoubleColumn(col 5:double, col 5:double) -> 13:double - Statistics: Num rows: 4096 Data size: 49152 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4096 Data size: 1983676 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(_col1), sum(_col3), sum(_col1), min(_col1) Group By Vectorization: @@ -86,7 +86,7 @@ STAGE PLANS: keys: _col0 (type: string), _col1 (type: double), _col2 (type: timestamp) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 4096 Data size: 49152 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4096 Data size: 1983676 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: double), _col2 (type: timestamp) sort order: +++ @@ -96,7 +96,7 @@ STAGE PLANS: native: false nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 4096 Data size: 49152 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4096 Data size: 1983676 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: bigint), _col4 (type: double), _col5 (type: double), _col6 (type: double) Execution mode: vectorized Map Vectorization: @@ -124,14 +124,14 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: double), KEY._col2 (type: timestamp) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 2048 Data size: 24576 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2048 Data size: 991838 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: double), _col2 (type: timestamp), (_col1 - 9763215.5639D) (type: double), (- (_col1 - 9763215.5639D)) (type: double), _col3 (type: bigint), power(((_col4 - ((_col5 * _col5) / _col3)) / CASE WHEN ((_col3 = 1L)) THEN (null) ELSE ((_col3 - 1)) END), 0.5) (type: double), (- power(((_col4 - ((_col5 * _col5) / _col3)) / CASE WHEN ((_col3 = 1L)) THEN (null) ELSE ((_col3 - 1)) END), 0.5)) (type: double), (power(((_col4 - ((_col5 * _col5) / _col3)) / CASE WHEN ((_col3 = 1L)) THEN (null) ELSE ((_col3 - 1)) END), 0.5) * UDFToDouble(_col3)) (type: double), _col6 (type: double), (9763215.5639D / _col1) (type: double), (CAST( _col3 AS decimal(19,0)) / -1.389) (type: decimal(28,6)), power(((_col4 - ((_col5 * _col5) / _col3)) / CASE WHEN ((_col3 = 1L)) THEN (null) ELSE ((_col3 - 1)) END), 0.5) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 - Statistics: Num rows: 2048 Data size: 24576 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2048 Data size: 991838 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 2048 Data size: 24576 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2048 Data size: 991838 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/parquet_vectorization_17.q.out b/ql/src/test/results/clientpositive/parquet_vectorization_17.q.out index 9fa2d72ff8..efacbdb35a 100644 --- a/ql/src/test/results/clientpositive/parquet_vectorization_17.q.out +++ b/ql/src/test/results/clientpositive/parquet_vectorization_17.q.out @@ -61,7 +61,7 @@ STAGE PLANS: TableScan alias: alltypesparquet filterExpr: ((cbigint > -23L) and ((cdouble <> 988888.0D) or (CAST( cint AS decimal(13,3)) > -863.257)) and ((ctinyint >= 33Y) or (UDFToLong(csmallint) >= cbigint) or (UDFToDouble(cfloat) = cdouble))) (type: boolean) - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 5951030 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct] @@ -71,7 +71,7 @@ STAGE PLANS: native: true predicateExpression: FilterExprAndExpr(children: FilterLongColGreaterLongScalar(col 3:bigint, val -23), FilterExprOrExpr(children: FilterDoubleColNotEqualDoubleScalar(col 5:double, val 988888.0), FilterDecimalColGreaterDecimalScalar(col 13:decimal(13,3), val -863.257)(children: CastLongToDecimal(col 2:int) -> 13:decimal(13,3))), FilterExprOrExpr(children: FilterLongColGreaterEqualLongScalar(col 0:tinyint, val 33), FilterLongColGreaterEqualLongColumn(col 1:bigint, col 3:bigint)(children: col 1:smallint), FilterDoubleColEqualDoubleColumn(col 4:double, col 5:double)(children: col 4:float))) predicate: (((cdouble <> 988888.0D) or (CAST( cint AS decimal(13,3)) > -863.257)) and ((ctinyint >= 33Y) or (UDFToLong(csmallint) >= cbigint) or (UDFToDouble(cfloat) = cdouble)) and (cbigint > -23L)) (type: boolean) - Statistics: Num rows: 4096 Data size: 49152 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4096 Data size: 1983676 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cfloat (type: float), cstring1 (type: string), cint (type: int), ctimestamp1 (type: timestamp), cdouble (type: double), cbigint (type: bigint), (UDFToDouble(cfloat) / UDFToDouble(ctinyint)) (type: double), (UDFToLong(cint) % cbigint) (type: bigint), (- cdouble) (type: double), (cdouble + (UDFToDouble(cfloat) / UDFToDouble(ctinyint))) (type: double), (cdouble / UDFToDouble(cint)) (type: double), (- (- cdouble)) (type: double), (9763215.5639 % CAST( cbigint AS decimal(19,0))) (type: decimal(11,4)), (2563.58D + (- (- cdouble))) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 @@ -80,7 +80,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [4, 6, 2, 8, 5, 3, 15, 16, 14, 17, 19, 20, 22, 18] selectExpressions: DoubleColDivideDoubleColumn(col 4:double, col 14:double)(children: col 4:float, CastLongToDouble(col 0:tinyint) -> 14:double) -> 15:double, LongColModuloLongColumn(col 2:bigint, col 3:bigint)(children: col 2:int) -> 16:bigint, DoubleColUnaryMinus(col 5:double) -> 14:double, DoubleColAddDoubleColumn(col 5:double, col 18:double)(children: DoubleColDivideDoubleColumn(col 4:double, col 17:double)(children: col 4:float, CastLongToDouble(col 0:tinyint) -> 17:double) -> 18:double) -> 17:double, DoubleColDivideDoubleColumn(col 5:double, col 18:double)(children: CastLongToDouble(col 2:int) -> 18:double) -> 19:double, DoubleColUnaryMinus(col 18:double)(children: DoubleColUnaryMinus(col 5:double) -> 18:double) -> 20:double, DecimalScalarModuloDecimalColumn(val 9763215.5639, col 21:decimal(19,0))(children: CastLongToDecimal(col 3:bigint) -> 21:decimal(19,0)) -> 22:decimal(11,4), DoubleScalarAddDoubleColumn(val 2563.58, col 23:double)(children: DoubleColUnaryMinus(col 18:double)(children: DoubleColUnaryMinus(col 5:double) -> 18:double) -> 23:double) -> 18:double - Statistics: Num rows: 4096 Data size: 49152 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4096 Data size: 1983676 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col5 (type: bigint), _col0 (type: float) sort order: ++ @@ -89,7 +89,7 @@ STAGE PLANS: native: false nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 4096 Data size: 49152 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4096 Data size: 1983676 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string), _col2 (type: int), _col3 (type: timestamp), _col4 (type: double), _col6 (type: double), _col7 (type: bigint), _col8 (type: double), _col9 (type: double), _col10 (type: double), _col11 (type: double), _col12 (type: decimal(11,4)), _col13 (type: double) Execution mode: vectorized Map Vectorization: @@ -115,10 +115,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey1 (type: float), VALUE._col0 (type: string), VALUE._col1 (type: int), VALUE._col2 (type: timestamp), VALUE._col3 (type: double), KEY.reducesinkkey0 (type: bigint), VALUE._col4 (type: double), VALUE._col5 (type: bigint), VALUE._col6 (type: double), VALUE._col7 (type: double), VALUE._col8 (type: double), VALUE._col9 (type: double), VALUE._col10 (type: decimal(11,4)), VALUE._col11 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 - Statistics: Num rows: 4096 Data size: 49152 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4096 Data size: 1983676 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 4096 Data size: 49152 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4096 Data size: 1983676 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/parquet_vectorization_2.q.out b/ql/src/test/results/clientpositive/parquet_vectorization_2.q.out index cb984bc47f..e5ea2f79ad 100644 --- a/ql/src/test/results/clientpositive/parquet_vectorization_2.q.out +++ b/ql/src/test/results/clientpositive/parquet_vectorization_2.q.out @@ -59,7 +59,7 @@ STAGE PLANS: TableScan alias: alltypesparquet filterExpr: (((ctimestamp1 < ctimestamp2) and (cstring2 like 'b%') and (cfloat <= -5638.15)) or ((cdouble < UDFToDouble(ctinyint)) and ((UDFToDouble(ctimestamp2) <> -10669.0D) or (cint < 359)))) (type: boolean) - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 5951030 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct] @@ -69,7 +69,7 @@ STAGE PLANS: native: true predicateExpression: FilterExprOrExpr(children: FilterExprAndExpr(children: FilterTimestampColLessTimestampColumn(col 8:timestamp, col 9:timestamp), FilterStringColLikeStringScalar(col 7:string, pattern b%), FilterDoubleColLessEqualDoubleScalar(col 4:float, val -5638.14990234375)), FilterExprAndExpr(children: FilterDoubleColLessDoubleColumn(col 5:double, col 13:double)(children: CastLongToDouble(col 0:tinyint) -> 13:double), FilterExprOrExpr(children: FilterDoubleColNotEqualDoubleScalar(col 13:double, val -10669.0)(children: CastTimestampToDouble(col 9:timestamp) -> 13:double), FilterLongColLessLongScalar(col 2:int, val 359)))) predicate: (((cdouble < UDFToDouble(ctinyint)) and ((UDFToDouble(ctimestamp2) <> -10669.0D) or (cint < 359))) or ((ctimestamp1 < ctimestamp2) and (cstring2 like 'b%') and (cfloat <= -5638.15))) (type: boolean) - Statistics: Num rows: 4778 Data size: 57336 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4778 Data size: 2313966 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: csmallint (type: smallint), cfloat (type: float), cbigint (type: bigint), ctinyint (type: tinyint), cdouble (type: double), UDFToDouble(cbigint) (type: double), (UDFToDouble(cbigint) * UDFToDouble(cbigint)) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 @@ -78,7 +78,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [1, 4, 3, 0, 5, 13, 16] selectExpressions: CastLongToDouble(col 3:bigint) -> 13:double, DoubleColMultiplyDoubleColumn(col 14:double, col 15:double)(children: CastLongToDouble(col 3:bigint) -> 14:double, CastLongToDouble(col 3:bigint) -> 15:double) -> 16:double - Statistics: Num rows: 4778 Data size: 57336 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4778 Data size: 2313966 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(_col0), count(_col0), sum(_col1), sum(_col6), sum(_col5), count(_col2), count(), min(_col3), sum(_col4), count(_col4) Group By Vectorization: diff --git a/ql/src/test/results/clientpositive/parquet_vectorization_3.q.out b/ql/src/test/results/clientpositive/parquet_vectorization_3.q.out index c7672a3da9..ff96c40dba 100644 --- a/ql/src/test/results/clientpositive/parquet_vectorization_3.q.out +++ b/ql/src/test/results/clientpositive/parquet_vectorization_3.q.out @@ -64,7 +64,7 @@ STAGE PLANS: TableScan alias: alltypesparquet filterExpr: (((UDFToFloat(cint) <= cfloat) and (CAST( cbigint AS decimal(22,3)) <> 79.553) and (UDFToDouble(ctimestamp2) = -29071.0D)) or ((UDFToDouble(cbigint) > cdouble) and (CAST( csmallint AS decimal(8,3)) >= 79.553) and (ctimestamp1 > ctimestamp2))) (type: boolean) - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 5951030 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct] @@ -74,7 +74,7 @@ STAGE PLANS: native: true predicateExpression: FilterExprOrExpr(children: FilterExprAndExpr(children: FilterDoubleColLessEqualDoubleColumn(col 13:float, col 4:float)(children: CastLongToFloatViaLongToDouble(col 2:int) -> 13:float), FilterDecimalColNotEqualDecimalScalar(col 14:decimal(22,3), val 79.553)(children: CastLongToDecimal(col 3:bigint) -> 14:decimal(22,3)), FilterDoubleColEqualDoubleScalar(col 13:double, val -29071.0)(children: CastTimestampToDouble(col 9:timestamp) -> 13:double)), FilterExprAndExpr(children: FilterDoubleColGreaterDoubleColumn(col 13:double, col 5:double)(children: CastLongToDouble(col 3:bigint) -> 13:double), FilterDecimalColGreaterEqualDecimalScalar(col 15:decimal(8,3), val 79.553)(children: CastLongToDecimal(col 1:smallint) -> 15:decimal(8,3)), FilterTimestampColGreaterTimestampColumn(col 8:timestamp, col 9:timestamp))) predicate: (((UDFToDouble(cbigint) > cdouble) and (CAST( csmallint AS decimal(8,3)) >= 79.553) and (ctimestamp1 > ctimestamp2)) or ((UDFToFloat(cint) <= cfloat) and (CAST( cbigint AS decimal(22,3)) <> 79.553) and (UDFToDouble(ctimestamp2) = -29071.0D))) (type: boolean) - Statistics: Num rows: 2503 Data size: 30036 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2503 Data size: 1212193 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: csmallint (type: smallint), ctinyint (type: tinyint), cfloat (type: float), cint (type: int), UDFToDouble(csmallint) (type: double), (UDFToDouble(csmallint) * UDFToDouble(csmallint)) (type: double), UDFToDouble(ctinyint) (type: double), (UDFToDouble(ctinyint) * UDFToDouble(ctinyint)) (type: double), UDFToDouble(cfloat) (type: double), (UDFToDouble(cfloat) * UDFToDouble(cfloat)) (type: double), UDFToDouble(cint) (type: double), (UDFToDouble(cint) * UDFToDouble(cint)) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 @@ -83,7 +83,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [1, 0, 4, 2, 13, 18, 16, 20, 4, 17, 19, 23] selectExpressions: CastLongToDouble(col 1:smallint) -> 13:double, DoubleColMultiplyDoubleColumn(col 16:double, col 17:double)(children: CastLongToDouble(col 1:smallint) -> 16:double, CastLongToDouble(col 1:smallint) -> 17:double) -> 18:double, CastLongToDouble(col 0:tinyint) -> 16:double, DoubleColMultiplyDoubleColumn(col 17:double, col 19:double)(children: CastLongToDouble(col 0:tinyint) -> 17:double, CastLongToDouble(col 0:tinyint) -> 19:double) -> 20:double, DoubleColMultiplyDoubleColumn(col 4:double, col 4:double)(children: col 4:float, col 4:float) -> 17:double, CastLongToDouble(col 2:int) -> 19:double, DoubleColMultiplyDoubleColumn(col 21:double, col 22:double)(children: CastLongToDouble(col 2:int) -> 21:double, CastLongToDouble(col 2:int) -> 22:double) -> 23:double - Statistics: Num rows: 2503 Data size: 30036 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2503 Data size: 1212193 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(_col5), sum(_col4), count(_col0), sum(_col7), sum(_col6), count(_col1), sum(_col9), sum(_col8), count(_col2), sum(_col2), sum(_col3), count(_col3), sum(_col11), sum(_col10) Group By Vectorization: diff --git a/ql/src/test/results/clientpositive/parquet_vectorization_4.q.out b/ql/src/test/results/clientpositive/parquet_vectorization_4.q.out index 998fdb3576..0deece4a19 100644 --- a/ql/src/test/results/clientpositive/parquet_vectorization_4.q.out +++ b/ql/src/test/results/clientpositive/parquet_vectorization_4.q.out @@ -59,7 +59,7 @@ STAGE PLANS: TableScan alias: alltypesparquet filterExpr: ((UDFToInteger(csmallint) >= cint) or ((UDFToInteger(ctinyint) <= -89010) and (cdouble > 79.553D)) or ((cbigint <> -563L) and ((UDFToLong(ctinyint) <> cbigint) or (cdouble <= -3728.0D)))) (type: boolean) - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 5951030 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct] @@ -69,7 +69,7 @@ STAGE PLANS: native: true predicateExpression: FilterExprOrExpr(children: FilterLongColGreaterEqualLongColumn(col 1:int, col 2:int)(children: col 1:smallint), FilterExprAndExpr(children: FilterLongColLessEqualLongScalar(col 0:int, val -89010)(children: col 0:tinyint), FilterDoubleColGreaterDoubleScalar(col 5:double, val 79.553)), FilterExprAndExpr(children: FilterLongColNotEqualLongScalar(col 3:bigint, val -563), FilterExprOrExpr(children: FilterLongColNotEqualLongColumn(col 0:bigint, col 3:bigint)(children: col 0:tinyint), FilterDoubleColLessEqualDoubleScalar(col 5:double, val -3728.0)))) predicate: (((UDFToInteger(ctinyint) <= -89010) and (cdouble > 79.553D)) or ((cbigint <> -563L) and ((UDFToLong(ctinyint) <> cbigint) or (cdouble <= -3728.0D))) or (UDFToInteger(csmallint) >= cint)) (type: boolean) - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 5951030 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cint (type: int), cdouble (type: double), ctinyint (type: tinyint), (cdouble * cdouble) (type: double) outputColumnNames: _col0, _col1, _col2, _col3 @@ -78,7 +78,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [2, 5, 0, 13] selectExpressions: DoubleColMultiplyDoubleColumn(col 5:double, col 5:double) -> 13:double - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 5951030 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(_col0), sum(_col3), sum(_col1), count(_col1), min(_col2) Group By Vectorization: diff --git a/ql/src/test/results/clientpositive/parquet_vectorization_5.q.out b/ql/src/test/results/clientpositive/parquet_vectorization_5.q.out index 4e490ad871..813c0fc3bc 100644 --- a/ql/src/test/results/clientpositive/parquet_vectorization_5.q.out +++ b/ql/src/test/results/clientpositive/parquet_vectorization_5.q.out @@ -53,7 +53,7 @@ STAGE PLANS: TableScan alias: alltypesparquet filterExpr: ((cboolean2 is not null and (cstring1 like '%b%')) or ((UDFToDouble(ctinyint) = cdouble) and ctimestamp2 is not null and (cstring2 like 'a'))) (type: boolean) - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 5951030 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct] @@ -63,7 +63,7 @@ STAGE PLANS: native: true predicateExpression: FilterExprOrExpr(children: FilterExprAndExpr(children: SelectColumnIsNotNull(col 11:boolean), FilterStringColLikeStringScalar(col 6:string, pattern %b%)), FilterExprAndExpr(children: FilterDoubleColEqualDoubleColumn(col 13:double, col 5:double)(children: CastLongToDouble(col 0:tinyint) -> 13:double), SelectColumnIsNotNull(col 9:timestamp), FilterStringColLikeStringScalar(col 7:string, pattern a))) predicate: (((UDFToDouble(ctinyint) = cdouble) and ctimestamp2 is not null and (cstring2 like 'a')) or (cboolean2 is not null and (cstring1 like '%b%'))) (type: boolean) - Statistics: Num rows: 9216 Data size: 110592 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 9216 Data size: 4463272 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int) outputColumnNames: ctinyint, csmallint, cint @@ -71,7 +71,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1, 2] - Statistics: Num rows: 9216 Data size: 110592 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 9216 Data size: 4463272 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: max(csmallint), count(), min(csmallint), sum(cint), max(ctinyint) Group By Vectorization: diff --git a/ql/src/test/results/clientpositive/parquet_vectorization_6.q.out b/ql/src/test/results/clientpositive/parquet_vectorization_6.q.out index fa649b2378..9891accc4a 100644 --- a/ql/src/test/results/clientpositive/parquet_vectorization_6.q.out +++ b/ql/src/test/results/clientpositive/parquet_vectorization_6.q.out @@ -55,7 +55,7 @@ STAGE PLANS: TableScan alias: alltypesparquet filterExpr: ((ctinyint <> 0Y) and (((cboolean1 <= 0) and (cboolean2 >= cboolean1)) or (cbigint is not null and ((cstring2 like '%a') or (cfloat <= -257))))) (type: boolean) - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 5951030 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct] @@ -65,7 +65,7 @@ STAGE PLANS: native: true predicateExpression: FilterExprAndExpr(children: FilterLongColNotEqualLongScalar(col 0:tinyint, val 0), FilterExprOrExpr(children: FilterExprAndExpr(children: FilterLongColLessEqualLongScalar(col 10:boolean, val 0), FilterLongColGreaterEqualLongColumn(col 11:boolean, col 10:boolean)), FilterExprAndExpr(children: SelectColumnIsNotNull(col 3:bigint), FilterExprOrExpr(children: FilterStringColLikeStringScalar(col 7:string, pattern %a), FilterDoubleColLessEqualDoubleScalar(col 4:float, val -257.0))))) predicate: ((((cboolean1 <= 0) and (cboolean2 >= cboolean1)) or (cbigint is not null and ((cstring2 like '%a') or (cfloat <= -257)))) and (ctinyint <> 0Y)) (type: boolean) - Statistics: Num rows: 11605 Data size: 139260 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 11605 Data size: 5620255 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cboolean1 (type: boolean), cfloat (type: float), cstring1 (type: string), (988888 * UDFToInteger(csmallint)) (type: int), (- csmallint) (type: smallint), (- cfloat) (type: float), (-26.28D / UDFToDouble(cfloat)) (type: double), (cfloat * 359.0) (type: float), (cint % UDFToInteger(ctinyint)) (type: int), (- cdouble) (type: double), (UDFToInteger(ctinyint) - -75) (type: int), (762 * (cint % UDFToInteger(ctinyint))) (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 @@ -74,13 +74,13 @@ STAGE PLANS: native: true projectedOutputColumnNums: [10, 4, 6, 13, 14, 15, 16, 17, 18, 19, 20, 22] selectExpressions: LongScalarMultiplyLongColumn(val 988888, col 1:int)(children: col 1:smallint) -> 13:int, LongColUnaryMinus(col 1:smallint) -> 14:smallint, DoubleColUnaryMinus(col 4:float) -> 15:float, DoubleScalarDivideDoubleColumn(val -26.28, col 4:double)(children: col 4:float) -> 16:double, DoubleColMultiplyDoubleScalar(col 4:float, val 359.0) -> 17:float, LongColModuloLongColumn(col 2:int, col 0:int)(children: col 0:tinyint) -> 18:int, DoubleColUnaryMinus(col 5:double) -> 19:double, LongColSubtractLongScalar(col 0:int, val -75)(children: col 0:tinyint) -> 20:int, LongScalarMultiplyLongColumn(val 762, col 21:int)(children: LongColModuloLongColumn(col 2:int, col 0:int)(children: col 0:tinyint) -> 21:int) -> 22:int - Statistics: Num rows: 11605 Data size: 139260 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 11605 Data size: 5620255 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 11605 Data size: 139260 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 11605 Data size: 5620255 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/parquet_vectorization_7.q.out b/ql/src/test/results/clientpositive/parquet_vectorization_7.q.out index 011dbb00c2..8a96b77fc0 100644 --- a/ql/src/test/results/clientpositive/parquet_vectorization_7.q.out +++ b/ql/src/test/results/clientpositive/parquet_vectorization_7.q.out @@ -67,7 +67,7 @@ STAGE PLANS: TableScan alias: alltypesparquet filterExpr: ((ctinyint <> 0Y) and ((UDFToDouble(ctimestamp1) <= -28800.0D) or (UDFToInteger(ctinyint) = cint) or (cstring2 like 'ss')) and ((cdouble > 988888.0D) or ((UDFToDouble(ctimestamp2) > -28815.0D) and (cdouble <= 3569.0D)))) (type: boolean) - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 5951030 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct] @@ -77,7 +77,7 @@ STAGE PLANS: native: true predicateExpression: FilterExprAndExpr(children: FilterLongColNotEqualLongScalar(col 0:tinyint, val 0), FilterExprOrExpr(children: FilterDoubleColLessEqualDoubleScalar(col 13:double, val -28800.0)(children: CastTimestampToDouble(col 8:timestamp) -> 13:double), FilterLongColEqualLongColumn(col 0:int, col 2:int)(children: col 0:tinyint), FilterStringColLikeStringScalar(col 7:string, pattern ss)), FilterExprOrExpr(children: FilterDoubleColGreaterDoubleScalar(col 5:double, val 988888.0), FilterExprAndExpr(children: FilterDoubleColGreaterDoubleScalar(col 13:double, val -28815.0)(children: CastTimestampToDouble(col 9:timestamp) -> 13:double), FilterDoubleColLessEqualDoubleScalar(col 5:double, val 3569.0)))) predicate: (((UDFToDouble(ctimestamp1) <= -28800.0D) or (UDFToInteger(ctinyint) = cint) or (cstring2 like 'ss')) and ((cdouble > 988888.0D) or ((UDFToDouble(ctimestamp2) > -28815.0D) and (cdouble <= 3569.0D))) and (ctinyint <> 0Y)) (type: boolean) - Statistics: Num rows: 5461 Data size: 65532 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5461 Data size: 2644740 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cboolean1 (type: boolean), cbigint (type: bigint), csmallint (type: smallint), ctinyint (type: tinyint), ctimestamp1 (type: timestamp), cstring1 (type: string), (cbigint + cbigint) (type: bigint), (UDFToInteger(csmallint) % -257) (type: int), (- csmallint) (type: smallint), (- ctinyint) (type: tinyint), (UDFToInteger((- ctinyint)) + 17) (type: int), (cbigint * UDFToLong((- csmallint))) (type: bigint), (cint % UDFToInteger(csmallint)) (type: int), (- ctinyint) (type: tinyint), ((- ctinyint) % ctinyint) (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 @@ -86,7 +86,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [10, 3, 1, 0, 8, 6, 14, 15, 16, 17, 19, 20, 18, 21, 23] selectExpressions: LongColAddLongColumn(col 3:bigint, col 3:bigint) -> 14:bigint, LongColModuloLongScalar(col 1:int, val -257)(children: col 1:smallint) -> 15:int, LongColUnaryMinus(col 1:smallint) -> 16:smallint, LongColUnaryMinus(col 0:tinyint) -> 17:tinyint, LongColAddLongScalar(col 18:int, val 17)(children: col 18:tinyint) -> 19:int, LongColMultiplyLongColumn(col 3:bigint, col 18:bigint)(children: col 18:smallint) -> 20:bigint, LongColModuloLongColumn(col 2:int, col 1:int)(children: col 1:smallint) -> 18:int, LongColUnaryMinus(col 0:tinyint) -> 21:tinyint, LongColModuloLongColumn(col 22:tinyint, col 0:tinyint)(children: LongColUnaryMinus(col 0:tinyint) -> 22:tinyint) -> 23:tinyint - Statistics: Num rows: 5461 Data size: 65532 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5461 Data size: 2644740 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: boolean), _col1 (type: bigint), _col2 (type: smallint), _col3 (type: tinyint), _col4 (type: timestamp), _col5 (type: string), _col6 (type: bigint), _col7 (type: int), _col8 (type: smallint), _col9 (type: tinyint), _col10 (type: int), _col11 (type: bigint), _col12 (type: int), _col13 (type: tinyint), _col14 (type: tinyint) sort order: +++++++++++++++ @@ -95,7 +95,7 @@ STAGE PLANS: native: false nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 5461 Data size: 65532 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5461 Data size: 2644740 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 Execution mode: vectorized Map Vectorization: @@ -121,13 +121,13 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: boolean), KEY.reducesinkkey1 (type: bigint), KEY.reducesinkkey2 (type: smallint), KEY.reducesinkkey3 (type: tinyint), KEY.reducesinkkey4 (type: timestamp), KEY.reducesinkkey5 (type: string), KEY.reducesinkkey6 (type: bigint), KEY.reducesinkkey7 (type: int), KEY.reducesinkkey8 (type: smallint), KEY.reducesinkkey9 (type: tinyint), KEY.reducesinkkey10 (type: int), KEY.reducesinkkey11 (type: bigint), KEY.reducesinkkey12 (type: int), KEY.reducesinkkey9 (type: tinyint), KEY.reducesinkkey14 (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 - Statistics: Num rows: 5461 Data size: 65532 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5461 Data size: 2644740 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 25 - Statistics: Num rows: 25 Data size: 300 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 12100 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 25 Data size: 300 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 12100 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -289,7 +289,7 @@ STAGE PLANS: TableScan alias: alltypesparquet filterExpr: ((ctinyint <> 0Y) and ((UDFToDouble(ctimestamp1) <= -28800.0D) or (UDFToInteger(ctinyint) = cint) or (cstring2 like 'ss')) and ((cdouble > 988888.0D) or ((UDFToDouble(ctimestamp2) > -28792.315D) and (cdouble <= 3569.0D)))) (type: boolean) - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 5951030 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true Filter Operator @@ -298,7 +298,7 @@ STAGE PLANS: native: true predicateExpression: FilterExprAndExpr(children: FilterLongColNotEqualLongScalar(col 0:tinyint, val 0), FilterExprOrExpr(children: FilterDoubleColLessEqualDoubleScalar(col 13:double, val -28800.0)(children: CastTimestampToDouble(col 8:timestamp) -> 13:double), FilterLongColEqualLongColumn(col 0:int, col 2:int)(children: col 0:tinyint), FilterStringColLikeStringScalar(col 7:string, pattern ss)), FilterExprOrExpr(children: FilterDoubleColGreaterDoubleScalar(col 5:double, val 988888.0), FilterExprAndExpr(children: FilterDoubleColGreaterDoubleScalar(col 13:double, val -28792.315)(children: CastTimestampToDouble(col 9:timestamp) -> 13:double), FilterDoubleColLessEqualDoubleScalar(col 5:double, val 3569.0)))) predicate: (((UDFToDouble(ctimestamp1) <= -28800.0D) or (UDFToInteger(ctinyint) = cint) or (cstring2 like 'ss')) and ((cdouble > 988888.0D) or ((UDFToDouble(ctimestamp2) > -28792.315D) and (cdouble <= 3569.0D))) and (ctinyint <> 0Y)) (type: boolean) - Statistics: Num rows: 5461 Data size: 65532 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5461 Data size: 2644740 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cboolean1 (type: boolean), cbigint (type: bigint), csmallint (type: smallint), ctinyint (type: tinyint), ctimestamp1 (type: timestamp), cstring1 (type: string), (cbigint + cbigint) (type: bigint), (UDFToInteger(csmallint) % -257) (type: int), (- csmallint) (type: smallint), (- ctinyint) (type: tinyint), (UDFToInteger((- ctinyint)) + 17) (type: int), (cbigint * UDFToLong((- csmallint))) (type: bigint), (cint % UDFToInteger(csmallint)) (type: int), (- ctinyint) (type: tinyint), ((- ctinyint) % ctinyint) (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 @@ -307,7 +307,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [10, 3, 1, 0, 8, 6, 14, 15, 16, 17, 19, 20, 18, 21, 23] selectExpressions: LongColAddLongColumn(col 3:bigint, col 3:bigint) -> 14:bigint, LongColModuloLongScalar(col 1:int, val -257)(children: col 1:smallint) -> 15:int, LongColUnaryMinus(col 1:smallint) -> 16:smallint, LongColUnaryMinus(col 0:tinyint) -> 17:tinyint, LongColAddLongScalar(col 18:int, val 17)(children: col 18:tinyint) -> 19:int, LongColMultiplyLongColumn(col 3:bigint, col 18:bigint)(children: col 18:smallint) -> 20:bigint, LongColModuloLongColumn(col 2:int, col 1:int)(children: col 1:smallint) -> 18:int, LongColUnaryMinus(col 0:tinyint) -> 21:tinyint, LongColModuloLongColumn(col 22:tinyint, col 0:tinyint)(children: LongColUnaryMinus(col 0:tinyint) -> 22:tinyint) -> 23:tinyint - Statistics: Num rows: 5461 Data size: 65532 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5461 Data size: 2644740 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: boolean), _col1 (type: bigint), _col2 (type: smallint), _col3 (type: tinyint), _col4 (type: timestamp), _col5 (type: string), _col6 (type: bigint), _col7 (type: int), _col8 (type: smallint), _col9 (type: tinyint), _col10 (type: int), _col11 (type: bigint), _col12 (type: int), _col13 (type: tinyint), _col14 (type: tinyint) sort order: +++++++++++++++ @@ -316,7 +316,7 @@ STAGE PLANS: native: false nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 5461 Data size: 65532 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5461 Data size: 2644740 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 Execution mode: vectorized Map Vectorization: @@ -336,13 +336,13 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: boolean), KEY.reducesinkkey1 (type: bigint), KEY.reducesinkkey2 (type: smallint), KEY.reducesinkkey3 (type: tinyint), KEY.reducesinkkey4 (type: timestamp), KEY.reducesinkkey5 (type: string), KEY.reducesinkkey6 (type: bigint), KEY.reducesinkkey7 (type: int), KEY.reducesinkkey8 (type: smallint), KEY.reducesinkkey9 (type: tinyint), KEY.reducesinkkey10 (type: int), KEY.reducesinkkey11 (type: bigint), KEY.reducesinkkey12 (type: int), KEY.reducesinkkey9 (type: tinyint), KEY.reducesinkkey14 (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 - Statistics: Num rows: 5461 Data size: 65532 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5461 Data size: 2644740 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 25 - Statistics: Num rows: 25 Data size: 300 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 12100 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 25 Data size: 300 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 12100 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/parquet_vectorization_8.q.out b/ql/src/test/results/clientpositive/parquet_vectorization_8.q.out index 38c59bd4c2..568e87d177 100644 --- a/ql/src/test/results/clientpositive/parquet_vectorization_8.q.out +++ b/ql/src/test/results/clientpositive/parquet_vectorization_8.q.out @@ -63,7 +63,7 @@ STAGE PLANS: TableScan alias: alltypesparquet filterExpr: ((cstring2 is not null and (UDFToDouble(ctimestamp1) <= 10.0D) and (UDFToDouble(ctimestamp2) <> 16.0D)) or (cfloat < -6432) or (cboolean1 is not null and (cdouble = 988888.0D))) (type: boolean) - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 5951030 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct] @@ -73,7 +73,7 @@ STAGE PLANS: native: true predicateExpression: FilterExprOrExpr(children: FilterExprAndExpr(children: SelectColumnIsNotNull(col 7:string), FilterDoubleColLessEqualDoubleScalar(col 13:double, val 10.0)(children: CastTimestampToDouble(col 8:timestamp) -> 13:double), FilterDoubleColNotEqualDoubleScalar(col 13:double, val 16.0)(children: CastTimestampToDouble(col 9:timestamp) -> 13:double)), FilterDoubleColLessDoubleScalar(col 4:float, val -6432.0), FilterExprAndExpr(children: SelectColumnIsNotNull(col 10:boolean), FilterDoubleColEqualDoubleScalar(col 5:double, val 988888.0))) predicate: ((cboolean1 is not null and (cdouble = 988888.0D)) or (cfloat < -6432) or (cstring2 is not null and (UDFToDouble(ctimestamp1) <= 10.0D) and (UDFToDouble(ctimestamp2) <> 16.0D))) (type: boolean) - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 5951030 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ctimestamp1 (type: timestamp), cdouble (type: double), cboolean1 (type: boolean), cstring1 (type: string), cfloat (type: float), (- cdouble) (type: double), (-5638.15D - cdouble) (type: double), (cdouble * -257.0D) (type: double), (UDFToFloat(cint) + cfloat) (type: float), ((- cdouble) + UDFToDouble(cbigint)) (type: double), (- cdouble) (type: double), (-1.389 - cfloat) (type: float), (- cfloat) (type: float), ((-5638.15D - cdouble) + UDFToDouble((UDFToFloat(cint) + cfloat))) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 @@ -82,7 +82,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [8, 5, 10, 6, 4, 13, 14, 15, 17, 19, 16, 18, 20, 22] selectExpressions: DoubleColUnaryMinus(col 5:double) -> 13:double, DoubleScalarSubtractDoubleColumn(val -5638.15, col 5:double) -> 14:double, DoubleColMultiplyDoubleScalar(col 5:double, val -257.0) -> 15:double, DoubleColAddDoubleColumn(col 16:float, col 4:float)(children: CastLongToFloatViaLongToDouble(col 2:int) -> 16:float) -> 17:float, DoubleColAddDoubleColumn(col 16:double, col 18:double)(children: DoubleColUnaryMinus(col 5:double) -> 16:double, CastLongToDouble(col 3:bigint) -> 18:double) -> 19:double, DoubleColUnaryMinus(col 5:double) -> 16:double, DoubleScalarSubtractDoubleColumn(val -1.3890000581741333, col 4:float) -> 18:float, DoubleColUnaryMinus(col 4:float) -> 20:float, DoubleColAddDoubleColumn(col 21:double, col 23:double)(children: DoubleScalarSubtractDoubleColumn(val -5638.15, col 5:double) -> 21:double, col 23:float) -> 22:double - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 5951030 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: timestamp), _col1 (type: double), _col2 (type: boolean), _col3 (type: string), _col4 (type: float), _col5 (type: double), _col6 (type: double), _col7 (type: double), _col8 (type: float), _col9 (type: double), _col10 (type: double), _col11 (type: float), _col12 (type: float), _col13 (type: double) sort order: ++++++++++++++ @@ -91,7 +91,7 @@ STAGE PLANS: native: false nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 5951030 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 Execution mode: vectorized Map Vectorization: @@ -117,13 +117,13 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: timestamp), KEY.reducesinkkey1 (type: double), KEY.reducesinkkey2 (type: boolean), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: float), KEY.reducesinkkey5 (type: double), KEY.reducesinkkey6 (type: double), KEY.reducesinkkey7 (type: double), KEY.reducesinkkey8 (type: float), KEY.reducesinkkey9 (type: double), KEY.reducesinkkey5 (type: double), KEY.reducesinkkey11 (type: float), KEY.reducesinkkey12 (type: float), KEY.reducesinkkey13 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 5951030 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 20 - Statistics: Num rows: 20 Data size: 240 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 9680 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 20 Data size: 240 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 9680 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -272,7 +272,7 @@ STAGE PLANS: TableScan alias: alltypesparquet filterExpr: ((cstring2 is not null and (UDFToDouble(ctimestamp1) <= 12.503D) and (UDFToDouble(ctimestamp2) <> 11.998D)) or (cfloat < -6432) or (cboolean1 is not null and (cdouble = 988888.0D))) (type: boolean) - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 5951030 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true Filter Operator @@ -281,7 +281,7 @@ STAGE PLANS: native: true predicateExpression: FilterExprOrExpr(children: FilterExprAndExpr(children: SelectColumnIsNotNull(col 7:string), FilterDoubleColLessEqualDoubleScalar(col 13:double, val 12.503)(children: CastTimestampToDouble(col 8:timestamp) -> 13:double), FilterDoubleColNotEqualDoubleScalar(col 13:double, val 11.998)(children: CastTimestampToDouble(col 9:timestamp) -> 13:double)), FilterDoubleColLessDoubleScalar(col 4:float, val -6432.0), FilterExprAndExpr(children: SelectColumnIsNotNull(col 10:boolean), FilterDoubleColEqualDoubleScalar(col 5:double, val 988888.0))) predicate: ((cboolean1 is not null and (cdouble = 988888.0D)) or (cfloat < -6432) or (cstring2 is not null and (UDFToDouble(ctimestamp1) <= 12.503D) and (UDFToDouble(ctimestamp2) <> 11.998D))) (type: boolean) - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 5951030 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ctimestamp1 (type: timestamp), cdouble (type: double), cboolean1 (type: boolean), cstring1 (type: string), cfloat (type: float), (- cdouble) (type: double), (-5638.15D - cdouble) (type: double), (cdouble * -257.0D) (type: double), (UDFToFloat(cint) + cfloat) (type: float), ((- cdouble) + UDFToDouble(cbigint)) (type: double), (- cdouble) (type: double), (-1.389 - cfloat) (type: float), (- cfloat) (type: float), ((-5638.15D - cdouble) + UDFToDouble((UDFToFloat(cint) + cfloat))) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 @@ -290,7 +290,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [8, 5, 10, 6, 4, 13, 14, 15, 17, 19, 16, 18, 20, 22] selectExpressions: DoubleColUnaryMinus(col 5:double) -> 13:double, DoubleScalarSubtractDoubleColumn(val -5638.15, col 5:double) -> 14:double, DoubleColMultiplyDoubleScalar(col 5:double, val -257.0) -> 15:double, DoubleColAddDoubleColumn(col 16:float, col 4:float)(children: CastLongToFloatViaLongToDouble(col 2:int) -> 16:float) -> 17:float, DoubleColAddDoubleColumn(col 16:double, col 18:double)(children: DoubleColUnaryMinus(col 5:double) -> 16:double, CastLongToDouble(col 3:bigint) -> 18:double) -> 19:double, DoubleColUnaryMinus(col 5:double) -> 16:double, DoubleScalarSubtractDoubleColumn(val -1.3890000581741333, col 4:float) -> 18:float, DoubleColUnaryMinus(col 4:float) -> 20:float, DoubleColAddDoubleColumn(col 21:double, col 23:double)(children: DoubleScalarSubtractDoubleColumn(val -5638.15, col 5:double) -> 21:double, col 23:float) -> 22:double - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 5951030 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: timestamp), _col1 (type: double), _col2 (type: boolean), _col3 (type: string), _col4 (type: float), _col5 (type: double), _col6 (type: double), _col7 (type: double), _col8 (type: float), _col9 (type: double), _col10 (type: double), _col11 (type: float), _col12 (type: float), _col13 (type: double) sort order: ++++++++++++++ @@ -299,7 +299,7 @@ STAGE PLANS: native: false nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 5951030 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 Execution mode: vectorized Map Vectorization: @@ -319,13 +319,13 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: timestamp), KEY.reducesinkkey1 (type: double), KEY.reducesinkkey2 (type: boolean), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: float), KEY.reducesinkkey5 (type: double), KEY.reducesinkkey6 (type: double), KEY.reducesinkkey7 (type: double), KEY.reducesinkkey8 (type: float), KEY.reducesinkkey9 (type: double), KEY.reducesinkkey5 (type: double), KEY.reducesinkkey11 (type: float), KEY.reducesinkkey12 (type: float), KEY.reducesinkkey13 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 5951030 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 20 - Statistics: Num rows: 20 Data size: 240 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 9680 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 20 Data size: 240 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 9680 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/parquet_vectorization_9.q.out b/ql/src/test/results/clientpositive/parquet_vectorization_9.q.out index fa317902b4..5eac26ac96 100644 --- a/ql/src/test/results/clientpositive/parquet_vectorization_9.q.out +++ b/ql/src/test/results/clientpositive/parquet_vectorization_9.q.out @@ -53,7 +53,7 @@ STAGE PLANS: TableScan alias: alltypesparquet filterExpr: ((cstring2 like '%b%') and ((cdouble >= -1.389D) or (cstring1 < 'a'))) (type: boolean) - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 5951030 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct] @@ -63,7 +63,7 @@ STAGE PLANS: native: true predicateExpression: FilterExprAndExpr(children: FilterStringColLikeStringScalar(col 7:string, pattern %b%), FilterExprOrExpr(children: FilterDoubleColGreaterEqualDoubleScalar(col 5:double, val -1.389), FilterStringGroupColLessStringScalar(col 6:string, val a))) predicate: (((cdouble >= -1.389D) or (cstring1 < 'a')) and (cstring2 like '%b%')) (type: boolean) - Statistics: Num rows: 4096 Data size: 49152 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4096 Data size: 1983676 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cstring1 (type: string), cdouble (type: double), ctimestamp1 (type: timestamp), (cdouble * cdouble) (type: double) outputColumnNames: _col0, _col1, _col2, _col3 @@ -72,7 +72,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [6, 5, 8, 13] selectExpressions: DoubleColMultiplyDoubleColumn(col 5:double, col 5:double) -> 13:double - Statistics: Num rows: 4096 Data size: 49152 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4096 Data size: 1983676 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(_col1), sum(_col3), sum(_col1), min(_col1) Group By Vectorization: @@ -86,7 +86,7 @@ STAGE PLANS: keys: _col0 (type: string), _col1 (type: double), _col2 (type: timestamp) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 4096 Data size: 49152 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4096 Data size: 1983676 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: double), _col2 (type: timestamp) sort order: +++ @@ -96,7 +96,7 @@ STAGE PLANS: native: false nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 4096 Data size: 49152 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4096 Data size: 1983676 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: bigint), _col4 (type: double), _col5 (type: double), _col6 (type: double) Execution mode: vectorized Map Vectorization: @@ -124,14 +124,14 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: double), KEY._col2 (type: timestamp) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 2048 Data size: 24576 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2048 Data size: 991838 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: double), _col2 (type: timestamp), (_col1 - 9763215.5639D) (type: double), (- (_col1 - 9763215.5639D)) (type: double), _col3 (type: bigint), power(((_col4 - ((_col5 * _col5) / _col3)) / CASE WHEN ((_col3 = 1L)) THEN (null) ELSE ((_col3 - 1)) END), 0.5) (type: double), (- power(((_col4 - ((_col5 * _col5) / _col3)) / CASE WHEN ((_col3 = 1L)) THEN (null) ELSE ((_col3 - 1)) END), 0.5)) (type: double), (power(((_col4 - ((_col5 * _col5) / _col3)) / CASE WHEN ((_col3 = 1L)) THEN (null) ELSE ((_col3 - 1)) END), 0.5) * UDFToDouble(_col3)) (type: double), _col6 (type: double), (9763215.5639D / _col1) (type: double), (CAST( _col3 AS decimal(19,0)) / -1.389) (type: decimal(28,6)), power(((_col4 - ((_col5 * _col5) / _col3)) / CASE WHEN ((_col3 = 1L)) THEN (null) ELSE ((_col3 - 1)) END), 0.5) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 - Statistics: Num rows: 2048 Data size: 24576 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2048 Data size: 991838 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 2048 Data size: 24576 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2048 Data size: 991838 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/parquet_vectorization_decimal_date.q.out b/ql/src/test/results/clientpositive/parquet_vectorization_decimal_date.q.out index 716589339e..4edac17b48 100644 --- a/ql/src/test/results/clientpositive/parquet_vectorization_decimal_date.q.out +++ b/ql/src/test/results/clientpositive/parquet_vectorization_decimal_date.q.out @@ -31,7 +31,7 @@ STAGE PLANS: TableScan alias: date_decimal_test_parquet filterExpr: (cint is not null and cdouble is not null) (type: boolean) - Statistics: Num rows: 12288 Data size: 49152 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 176614 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true Filter Operator @@ -40,7 +40,7 @@ STAGE PLANS: native: true predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 0:int), SelectColumnIsNotNull(col 1:double)) predicate: (cdouble is not null and cint is not null) (type: boolean) - Statistics: Num rows: 12288 Data size: 49152 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 176614 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cdate (type: date), cdecimal (type: decimal(20,10)) outputColumnNames: _col0, _col1 @@ -48,19 +48,19 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [2, 3] - Statistics: Num rows: 12288 Data size: 49152 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 176614 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 10 Limit Vectorization: className: VectorLimitOperator native: true - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 140 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 140 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/parquet_vectorization_div0.q.out b/ql/src/test/results/clientpositive/parquet_vectorization_div0.q.out index 1baa650dcb..35ce098227 100644 --- a/ql/src/test/results/clientpositive/parquet_vectorization_div0.q.out +++ b/ql/src/test/results/clientpositive/parquet_vectorization_div0.q.out @@ -18,7 +18,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesparquet - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 5951030 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true Select Operator @@ -29,19 +29,19 @@ STAGE PLANS: native: true projectedOutputColumnNums: [13] selectExpressions: DoubleColDivideDoubleScalar(col 5:double, val 0.0) -> 13:double - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 5951030 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 100 Limit Vectorization: className: VectorLimitOperator native: true - Statistics: Num rows: 100 Data size: 1200 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 100 Data size: 48400 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 100 Data size: 1200 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 100 Data size: 48400 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -194,7 +194,7 @@ STAGE PLANS: TableScan alias: alltypesparquet filterExpr: ((cbigint > 0L) and (cbigint < 100000000L)) (type: boolean) - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 5951030 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true Filter Operator @@ -203,7 +203,7 @@ STAGE PLANS: native: true predicateExpression: FilterExprAndExpr(children: FilterLongColGreaterLongScalar(col 3:bigint, val 0), FilterLongColLessLongScalar(col 3:bigint, val 100000000)) predicate: ((cbigint < 100000000L) and (cbigint > 0L)) (type: boolean) - Statistics: Num rows: 1365 Data size: 16380 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1365 Data size: 661064 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: (cbigint - 988888) (type: bigint), (cdouble / UDFToDouble((cbigint - 988888))) (type: double), (1.2 / CAST( (cbigint - 988888) AS decimal(19,0))) (type: decimal(22,21)) outputColumnNames: _col0, _col1, _col2 @@ -212,7 +212,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [13, 16, 18] selectExpressions: LongColSubtractLongScalar(col 3:bigint, val 988888) -> 13:bigint, DoubleColDivideDoubleColumn(col 5:double, col 15:double)(children: CastLongToDouble(col 14:bigint)(children: LongColSubtractLongScalar(col 3:bigint, val 988888) -> 14:bigint) -> 15:double) -> 16:double, DecimalScalarDivideDecimalColumn(val 1.2, col 17:decimal(19,0))(children: CastLongToDecimal(col 14:bigint)(children: LongColSubtractLongScalar(col 3:bigint, val 988888) -> 14:bigint) -> 17:decimal(19,0)) -> 18:decimal(22,21) - Statistics: Num rows: 1365 Data size: 16380 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1365 Data size: 661064 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: bigint), _col1 (type: double) sort order: ++ @@ -221,7 +221,7 @@ STAGE PLANS: native: false nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 1365 Data size: 16380 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1365 Data size: 661064 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col2 (type: decimal(22,21)) Execution mode: vectorized @@ -242,13 +242,13 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: bigint), KEY.reducesinkkey1 (type: double), VALUE._col0 (type: decimal(22,21)) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1365 Data size: 16380 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1365 Data size: 661064 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 100 - Statistics: Num rows: 100 Data size: 1200 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 100 Data size: 48400 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 100 Data size: 1200 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 100 Data size: 48400 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -393,7 +393,7 @@ STAGE PLANS: TableScan alias: alltypesparquet filterExpr: ((cdouble >= -500.0D) and (cdouble < -199.0D)) (type: boolean) - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 5951030 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true Filter Operator @@ -402,7 +402,7 @@ STAGE PLANS: native: true predicateExpression: FilterExprAndExpr(children: FilterDoubleColGreaterEqualDoubleScalar(col 5:double, val -500.0), FilterDoubleColLessDoubleScalar(col 5:double, val -199.0)) predicate: ((cdouble < -199.0D) and (cdouble >= -500.0D)) (type: boolean) - Statistics: Num rows: 1365 Data size: 16380 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1365 Data size: 661064 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: (cdouble + 200.0D) (type: double), (UDFToDouble(cbigint) / (cdouble + 200.0D)) (type: double), ((cdouble + 200.0D) / (cdouble + 200.0D)) (type: double), (3.0D / (cdouble + 200.0D)) (type: double), (1.2D / (cdouble + 200.0D)) (type: double) outputColumnNames: _col0, _col1, _col2, _col4, _col5 @@ -411,7 +411,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [13, 16, 17, 15, 18] selectExpressions: DoubleColAddDoubleScalar(col 5:double, val 200.0) -> 13:double, DoubleColDivideDoubleColumn(col 14:double, col 15:double)(children: CastLongToDouble(col 3:bigint) -> 14:double, DoubleColAddDoubleScalar(col 5:double, val 200.0) -> 15:double) -> 16:double, DoubleColDivideDoubleColumn(col 14:double, col 15:double)(children: DoubleColAddDoubleScalar(col 5:double, val 200.0) -> 14:double, DoubleColAddDoubleScalar(col 5:double, val 200.0) -> 15:double) -> 17:double, DoubleScalarDivideDoubleColumn(val 3.0, col 14:double)(children: DoubleColAddDoubleScalar(col 5:double, val 200.0) -> 14:double) -> 15:double, DoubleScalarDivideDoubleColumn(val 1.2, col 14:double)(children: DoubleColAddDoubleScalar(col 5:double, val 200.0) -> 14:double) -> 18:double - Statistics: Num rows: 1365 Data size: 16380 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1365 Data size: 661064 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: double), _col1 (type: double) sort order: ++ @@ -420,7 +420,7 @@ STAGE PLANS: native: false nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 1365 Data size: 16380 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1365 Data size: 661064 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col2 (type: double), _col4 (type: double), _col5 (type: double) Execution mode: vectorized @@ -441,13 +441,13 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: double), KEY.reducesinkkey1 (type: double), VALUE._col0 (type: double), KEY.reducesinkkey1 (type: double), VALUE._col1 (type: double), VALUE._col2 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 1365 Data size: 16380 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1365 Data size: 661064 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 100 - Statistics: Num rows: 100 Data size: 1200 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 100 Data size: 48400 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 100 Data size: 1200 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 100 Data size: 48400 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/parquet_vectorization_limit.q.out b/ql/src/test/results/clientpositive/parquet_vectorization_limit.q.out index dc9f66361c..3e4ab16fa6 100644 --- a/ql/src/test/results/clientpositive/parquet_vectorization_limit.q.out +++ b/ql/src/test/results/clientpositive/parquet_vectorization_limit.q.out @@ -18,20 +18,20 @@ STAGE PLANS: TableScan alias: alltypesparquet filterExpr: ((UDFToDouble(cbigint) < cdouble) and (cint > 0)) (type: boolean) - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 5951030 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: ((UDFToDouble(cbigint) < cdouble) and (cint > 0)) (type: boolean) - Statistics: Num rows: 1365 Data size: 16380 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1365 Data size: 661064 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cbigint (type: bigint), cdouble (type: double) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1365 Data size: 16380 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1365 Data size: 661064 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 7 - Statistics: Num rows: 7 Data size: 84 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 7 Data size: 3388 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 7 Data size: 84 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 7 Data size: 3388 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -90,7 +90,7 @@ STAGE PLANS: TableScan alias: alltypesparquet filterExpr: ctinyint is not null (type: boolean) - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 5951030 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct] @@ -100,7 +100,7 @@ STAGE PLANS: native: true predicateExpression: SelectColumnIsNotNull(col 0:tinyint) predicate: ctinyint is not null (type: boolean) - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 5951030 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ctinyint (type: tinyint), cdouble (type: double), csmallint (type: smallint) outputColumnNames: _col0, _col1, _col2 @@ -108,7 +108,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 5, 1] - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 5951030 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: tinyint), _col1 (type: double) sort order: ++ @@ -117,7 +117,7 @@ STAGE PLANS: native: false nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 5951030 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.3 value expressions: _col2 (type: smallint) Execution mode: vectorized @@ -144,13 +144,13 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: tinyint), KEY.reducesinkkey1 (type: double), VALUE._col0 (type: smallint) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 5951030 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 20 - Statistics: Num rows: 20 Data size: 240 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 9680 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 20 Data size: 240 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 9680 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -210,7 +210,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesparquet - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 5951030 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct] @@ -222,7 +222,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [0, 13] selectExpressions: DoubleColAddDoubleScalar(col 5:double, val 1.0) -> 13:double - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 5951030 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(_col1), count(_col1) Group By Vectorization: @@ -236,7 +236,7 @@ STAGE PLANS: keys: _col0 (type: tinyint) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 5951030 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: tinyint) sort order: + @@ -246,7 +246,7 @@ STAGE PLANS: native: false nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 5951030 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.3 value expressions: _col1 (type: double), _col2 (type: bigint) Execution mode: vectorized @@ -275,17 +275,17 @@ STAGE PLANS: keys: KEY._col0 (type: tinyint) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6144 Data size: 73728 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6144 Data size: 2975515 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: tinyint), (_col1 / _col2) (type: double) outputColumnNames: _col0, _col1 - Statistics: Num rows: 6144 Data size: 73728 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6144 Data size: 2975515 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 20 - Statistics: Num rows: 20 Data size: 240 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 9680 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 20 Data size: 240 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 9680 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -345,7 +345,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesparquet - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 5951030 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct] @@ -356,7 +356,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0] - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 5951030 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: className: VectorGroupByOperator @@ -368,7 +368,7 @@ STAGE PLANS: keys: ctinyint (type: tinyint) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 5951030 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: tinyint) sort order: + @@ -378,7 +378,7 @@ STAGE PLANS: native: false nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 5951030 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.3 Execution mode: vectorized Map Vectorization: @@ -405,13 +405,13 @@ STAGE PLANS: keys: KEY._col0 (type: tinyint) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 6144 Data size: 73728 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6144 Data size: 2975515 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 20 - Statistics: Num rows: 20 Data size: 240 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 9680 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 20 Data size: 240 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 9680 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -471,22 +471,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesparquet - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 5951030 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ctinyint (type: tinyint), cdouble (type: double) outputColumnNames: ctinyint, cdouble - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 5951030 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(DISTINCT cdouble) keys: ctinyint (type: tinyint), cdouble (type: double) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 5951030 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: tinyint), _col1 (type: double) sort order: ++ Map-reduce partition columns: _col0 (type: tinyint) - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 5951030 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.3 Map Vectorization: enabled: true @@ -504,13 +504,13 @@ STAGE PLANS: keys: KEY._col0 (type: tinyint) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 6144 Data size: 73728 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6144 Data size: 2975515 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 20 - Statistics: Num rows: 20 Data size: 240 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 9680 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 20 Data size: 240 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 9680 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -600,7 +600,7 @@ STAGE PLANS: TableScan alias: alltypesparquet filterExpr: ctinyint is not null (type: boolean) - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 5951030 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct] @@ -610,7 +610,7 @@ STAGE PLANS: native: true predicateExpression: SelectColumnIsNotNull(col 0:tinyint) predicate: ctinyint is not null (type: boolean) - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 5951030 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(ctinyint) Group By Vectorization: @@ -624,7 +624,7 @@ STAGE PLANS: keys: cdouble (type: double) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 5951030 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: double) sort order: + @@ -634,7 +634,7 @@ STAGE PLANS: native: false nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 5951030 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Execution mode: vectorized Map Vectorization: @@ -662,7 +662,7 @@ STAGE PLANS: keys: KEY._col0 (type: double) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 6144 Data size: 73728 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6144 Data size: 2975515 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: @@ -685,7 +685,7 @@ STAGE PLANS: native: false nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 6144 Data size: 73728 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6144 Data size: 2975515 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.3 Execution mode: vectorized Map Vectorization: @@ -711,13 +711,13 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey1 (type: double), KEY.reducesinkkey0 (type: bigint) outputColumnNames: _col0, _col1 - Statistics: Num rows: 6144 Data size: 73728 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6144 Data size: 2975515 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 20 - Statistics: Num rows: 20 Data size: 240 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 9680 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 20 Data size: 240 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 9680 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/parquet_vectorization_offset_limit.q.out b/ql/src/test/results/clientpositive/parquet_vectorization_offset_limit.q.out index 9455b941ed..61fdab1d61 100644 --- a/ql/src/test/results/clientpositive/parquet_vectorization_offset_limit.q.out +++ b/ql/src/test/results/clientpositive/parquet_vectorization_offset_limit.q.out @@ -18,21 +18,21 @@ STAGE PLANS: TableScan alias: alltypesparquet filterExpr: ((UDFToDouble(cbigint) < cdouble) and (cint > 0)) (type: boolean) - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 5951030 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: ((UDFToDouble(cbigint) < cdouble) and (cint > 0)) (type: boolean) - Statistics: Num rows: 1365 Data size: 16380 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1365 Data size: 661064 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cbigint (type: bigint), cdouble (type: double) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1365 Data size: 16380 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1365 Data size: 661064 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 2 Offset of rows: 3 - Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 968 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 968 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -86,7 +86,7 @@ STAGE PLANS: TableScan alias: alltypesparquet filterExpr: ctinyint is not null (type: boolean) - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 5951030 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true Filter Operator @@ -95,7 +95,7 @@ STAGE PLANS: native: true predicateExpression: SelectColumnIsNotNull(col 0:tinyint) predicate: ctinyint is not null (type: boolean) - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 5951030 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ctinyint (type: tinyint), cdouble (type: double), csmallint (type: smallint) outputColumnNames: _col0, _col1, _col2 @@ -103,7 +103,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 5, 1] - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 5951030 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: tinyint), _col1 (type: double) sort order: ++ @@ -112,7 +112,7 @@ STAGE PLANS: native: false nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 5951030 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col2 (type: smallint) Execution mode: vectorized @@ -133,14 +133,14 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: tinyint), KEY.reducesinkkey1 (type: double), VALUE._col0 (type: smallint) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 5951030 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 3 Offset of rows: 10 - Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 1452 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 1452 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/parquet_vectorization_part_project.q.out b/ql/src/test/results/clientpositive/parquet_vectorization_part_project.q.out index 0786685a3b..486dbe7f5e 100644 --- a/ql/src/test/results/clientpositive/parquet_vectorization_part_project.q.out +++ b/ql/src/test/results/clientpositive/parquet_vectorization_part_project.q.out @@ -64,15 +64,15 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesparquet_part_n0 - Statistics: Num rows: 200 Data size: 2400 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 200 Data size: 10942 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: (cdouble + 2.0D) (type: double) outputColumnNames: _col0 - Statistics: Num rows: 200 Data size: 2400 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 200 Data size: 10942 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: double) sort order: + - Statistics: Num rows: 200 Data size: 2400 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 200 Data size: 10942 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 Execution mode: vectorized Map Vectorization: @@ -92,13 +92,13 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: double) outputColumnNames: _col0 - Statistics: Num rows: 200 Data size: 2400 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 200 Data size: 10942 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 10 - Statistics: Num rows: 10 Data size: 120 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 540 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 10 Data size: 120 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 540 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/parquet_vectorization_pushdown.q.out b/ql/src/test/results/clientpositive/parquet_vectorization_pushdown.q.out index b29ca9a223..c2c6c7ae71 100644 --- a/ql/src/test/results/clientpositive/parquet_vectorization_pushdown.q.out +++ b/ql/src/test/results/clientpositive/parquet_vectorization_pushdown.q.out @@ -18,14 +18,14 @@ STAGE PLANS: TableScan alias: alltypesparquet filterExpr: (UDFToDouble(cbigint) < cdouble) (type: boolean) - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 5951030 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (UDFToDouble(cbigint) < cdouble) (type: boolean) - Statistics: Num rows: 4096 Data size: 49152 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4096 Data size: 1983676 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cbigint (type: bigint) outputColumnNames: cbigint - Statistics: Num rows: 4096 Data size: 49152 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4096 Data size: 1983676 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(cbigint), count(cbigint) mode: hash diff --git a/ql/src/test/results/clientpositive/spark/parquet_join.q.out b/ql/src/test/results/clientpositive/spark/parquet_join.q.out index 50f69432cd..9bd4b217a6 100644 --- a/ql/src/test/results/clientpositive/spark/parquet_join.q.out +++ b/ql/src/test/results/clientpositive/spark/parquet_join.q.out @@ -81,37 +81,37 @@ STAGE PLANS: TableScan alias: p1 filterExpr: key is not null (type: boolean) - Statistics: Num rows: 2 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 120 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 2 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 120 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 120 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 2 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 120 Basic stats: COMPLETE Column stats: NONE Map 3 Map Operator Tree: TableScan alias: p2 filterExpr: key is not null (type: boolean) - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 199 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 199 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int), myvalue (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 199 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 199 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) Reducer 2 Reduce Operator Tree: @@ -122,14 +122,14 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col2 - Statistics: Num rows: 2 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 132 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col2 (type: string) outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 132 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 2 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 132 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -172,14 +172,14 @@ STAGE PLANS: TableScan alias: p1 filterExpr: key is not null (type: boolean) - Statistics: Num rows: 2 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 120 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 2 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 120 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 120 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: 0 _col0 (type: int) @@ -196,14 +196,14 @@ STAGE PLANS: TableScan alias: p2 filterExpr: key is not null (type: boolean) - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 199 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 199 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int), myvalue (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 199 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 @@ -213,14 +213,14 @@ STAGE PLANS: outputColumnNames: _col2 input vertices: 0 Map 1 - Statistics: Num rows: 2 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 132 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col2 (type: string) outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 132 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 2 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 132 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -301,14 +301,14 @@ STAGE PLANS: TableScan alias: p2 filterExpr: key is not null (type: boolean) - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 245 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 245 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int), value2 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 245 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 @@ -316,14 +316,14 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col1, _col3 - Statistics: Num rows: 2 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 158 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col1 (type: string), _col3 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 158 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 2 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 158 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/spark/parquet_vectorization_0.q.out b/ql/src/test/results/clientpositive/spark/parquet_vectorization_0.q.out index 0f4d95231a..08e3466e80 100644 --- a/ql/src/test/results/clientpositive/spark/parquet_vectorization_0.q.out +++ b/ql/src/test/results/clientpositive/spark/parquet_vectorization_0.q.out @@ -34,7 +34,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesparquet - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 5951030 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct] @@ -45,7 +45,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0] - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 5951030 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: min(ctinyint), max(ctinyint), count(ctinyint), count() Group By Vectorization: @@ -213,7 +213,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesparquet - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 5951030 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct] @@ -224,7 +224,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0] - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 5951030 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(ctinyint) Group By Vectorization: @@ -401,11 +401,11 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesparquet - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 5951030 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ctinyint (type: tinyint), UDFToDouble(ctinyint) (type: double), (UDFToDouble(ctinyint) * UDFToDouble(ctinyint)) (type: double) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 5951030 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(_col0), count(_col0), sum(_col2), sum(_col1) mode: hash @@ -540,7 +540,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesparquet - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 5951030 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct] @@ -551,7 +551,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [3] - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 5951030 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: min(cbigint), max(cbigint), count(cbigint), count() Group By Vectorization: @@ -719,7 +719,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesparquet - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 5951030 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct] @@ -730,7 +730,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [3] - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 5951030 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(cbigint) Group By Vectorization: @@ -907,11 +907,11 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesparquet - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 5951030 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cbigint (type: bigint), UDFToDouble(cbigint) (type: double), (UDFToDouble(cbigint) * UDFToDouble(cbigint)) (type: double) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 5951030 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(_col0), count(_col0), sum(_col2), sum(_col1) mode: hash @@ -1046,7 +1046,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesparquet - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 5951030 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct] @@ -1057,7 +1057,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [4] - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 5951030 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: min(cfloat), max(cfloat), count(cfloat), count() Group By Vectorization: @@ -1225,7 +1225,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesparquet - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 5951030 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct] @@ -1236,7 +1236,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [4] - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 5951030 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(cfloat) Group By Vectorization: @@ -1413,11 +1413,11 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesparquet - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 5951030 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cfloat (type: float), UDFToDouble(cfloat) (type: double), (UDFToDouble(cfloat) * UDFToDouble(cfloat)) (type: double) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 5951030 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(_col0), count(_col0), sum(_col2), sum(_col1) mode: hash @@ -1591,7 +1591,7 @@ STAGE PLANS: TableScan alias: alltypesparquet filterExpr: ((cstring2 like '%b%') or (CAST( cint AS decimal(13,3)) <> 79.553) or (UDFToDouble(cbigint) < cdouble) or ((UDFToShort(ctinyint) >= csmallint) and (cboolean2 = 1) and (UDFToInteger(ctinyint) = 3569))) (type: boolean) - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 5951030 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct] @@ -1601,7 +1601,7 @@ STAGE PLANS: native: true predicateExpression: FilterExprOrExpr(children: FilterStringColLikeStringScalar(col 7:string, pattern %b%), FilterDecimalColNotEqualDecimalScalar(col 13:decimal(13,3), val 79.553)(children: CastLongToDecimal(col 2:int) -> 13:decimal(13,3)), FilterDoubleColLessDoubleColumn(col 14:double, col 5:double)(children: CastLongToDouble(col 3:bigint) -> 14:double), FilterExprAndExpr(children: FilterLongColGreaterEqualLongColumn(col 0:smallint, col 1:smallint)(children: col 0:tinyint), FilterLongColEqualLongScalar(col 11:boolean, val 1), FilterLongColEqualLongScalar(col 0:int, val 3569)(children: col 0:tinyint))) predicate: (((UDFToShort(ctinyint) >= csmallint) and (cboolean2 = 1) and (UDFToInteger(ctinyint) = 3569)) or (CAST( cint AS decimal(13,3)) <> 79.553) or (UDFToDouble(cbigint) < cdouble) or (cstring2 like '%b%')) (type: boolean) - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 5951030 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cbigint (type: bigint), cfloat (type: float), ctinyint (type: tinyint), UDFToDouble(cbigint) (type: double), (UDFToDouble(cbigint) * UDFToDouble(cbigint)) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4 @@ -1610,7 +1610,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [3, 4, 0, 14, 17] selectExpressions: CastLongToDouble(col 3:bigint) -> 14:double, DoubleColMultiplyDoubleColumn(col 15:double, col 16:double)(children: CastLongToDouble(col 3:bigint) -> 15:double, CastLongToDouble(col 3:bigint) -> 16:double) -> 17:double - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 5951030 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(_col0), count(_col0), sum(_col4), sum(_col3), count(), sum(_col1), min(_col2) Group By Vectorization: @@ -1787,14 +1787,14 @@ STAGE PLANS: TableScan alias: alltypesparquet filterExpr: ((cstring1 like 'a%') or (cstring1 like 'b%') or (cstring1 like 'c%') or ((length(cstring1) < 50) and (cstring1 like '%n') and (length(cstring1) > 0))) (type: boolean) - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 5951030 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: (((length(cstring1) < 50) and (cstring1 like '%n') and (length(cstring1) > 0)) or (cstring1 like 'a%') or (cstring1 like 'b%') or (cstring1 like 'c%')) (type: boolean) - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 5951030 Basic stats: COMPLETE Column stats: NONE Select Operator - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 5951030 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() mode: hash @@ -1828,7 +1828,7 @@ STAGE PLANS: name default.alltypesparquet numFiles 1 numRows 12288 - rawDataSize 147456 + rawDataSize 0 serialization.ddl struct alltypesparquet { byte ctinyint, i16 csmallint, i32 cint, i64 cbigint, float cfloat, double cdouble, string cstring1, string cstring2, timestamp ctimestamp1, timestamp ctimestamp2, bool cboolean1, bool cboolean2} serialization.format 1 serialization.lib org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe @@ -1850,7 +1850,7 @@ STAGE PLANS: name default.alltypesparquet numFiles 1 numRows 12288 - rawDataSize 147456 + rawDataSize 0 serialization.ddl struct alltypesparquet { byte ctinyint, i16 csmallint, i32 cint, i64 cbigint, float cfloat, double cdouble, string cstring1, string cstring2, timestamp ctimestamp1, timestamp ctimestamp2, bool cboolean1, bool cboolean2} serialization.format 1 serialization.lib org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe @@ -30598,22 +30598,22 @@ STAGE PLANS: TableScan alias: alltypesparquet filterExpr: (((cint = 49) and (cfloat = 3.5)) or ((cint = 47) and (cfloat = 2.09)) or ((cint = 45) and (cfloat = 3.02))) (type: boolean) - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 5951030 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: (((cint = 45) and (cfloat = 3.02)) or ((cint = 47) and (cfloat = 2.09)) or ((cint = 49) and (cfloat = 3.5))) (type: boolean) - Statistics: Num rows: 9216 Data size: 110592 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 9216 Data size: 4463272 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cbigint (type: bigint), cfloat (type: float), cdouble (type: double), cstring1 (type: string), cstring2 (type: string), ctimestamp1 (type: timestamp), ctimestamp2 (type: timestamp), cboolean1 (type: boolean), cboolean2 (type: boolean) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 - Statistics: Num rows: 9216 Data size: 110592 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 9216 Data size: 4463272 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 9216 Data size: 110592 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 9216 Data size: 4463272 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -30651,7 +30651,7 @@ STAGE PLANS: name default.alltypesparquet numFiles 1 numRows 12288 - rawDataSize 147456 + rawDataSize 0 serialization.ddl struct alltypesparquet { byte ctinyint, i16 csmallint, i32 cint, i64 cbigint, float cfloat, double cdouble, string cstring1, string cstring2, timestamp ctimestamp1, timestamp ctimestamp2, bool cboolean1, bool cboolean2} serialization.format 1 serialization.lib org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe @@ -30673,7 +30673,7 @@ STAGE PLANS: name default.alltypesparquet numFiles 1 numRows 12288 - rawDataSize 147456 + rawDataSize 0 serialization.ddl struct alltypesparquet { byte ctinyint, i16 csmallint, i32 cint, i64 cbigint, float cfloat, double cdouble, string cstring1, string cstring2, timestamp ctimestamp1, timestamp ctimestamp2, bool cboolean1, bool cboolean2} serialization.format 1 serialization.lib org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe @@ -30715,22 +30715,22 @@ STAGE PLANS: TableScan alias: alltypesparquet filterExpr: (struct(cint,cfloat)) IN (const struct(49,3.5), const struct(47,2.09), const struct(45,3.02)) (type: boolean) - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 5951030 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: (struct(cint,cfloat)) IN (const struct(49,3.5), const struct(47,2.09), const struct(45,3.02)) (type: boolean) - Statistics: Num rows: 3072 Data size: 36864 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3072 Data size: 1487757 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cbigint (type: bigint), cfloat (type: float), cdouble (type: double), cstring1 (type: string), cstring2 (type: string), ctimestamp1 (type: timestamp), ctimestamp2 (type: timestamp), cboolean1 (type: boolean), cboolean2 (type: boolean) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 - Statistics: Num rows: 3072 Data size: 36864 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3072 Data size: 1487757 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 3072 Data size: 36864 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3072 Data size: 1487757 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -30767,7 +30767,7 @@ STAGE PLANS: name default.alltypesparquet numFiles 1 numRows 12288 - rawDataSize 147456 + rawDataSize 0 serialization.ddl struct alltypesparquet { byte ctinyint, i16 csmallint, i32 cint, i64 cbigint, float cfloat, double cdouble, string cstring1, string cstring2, timestamp ctimestamp1, timestamp ctimestamp2, bool cboolean1, bool cboolean2} serialization.format 1 serialization.lib org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe @@ -30789,7 +30789,7 @@ STAGE PLANS: name default.alltypesparquet numFiles 1 numRows 12288 - rawDataSize 147456 + rawDataSize 0 serialization.ddl struct alltypesparquet { byte ctinyint, i16 csmallint, i32 cint, i64 cbigint, float cfloat, double cdouble, string cstring1, string cstring2, timestamp ctimestamp1, timestamp ctimestamp2, bool cboolean1, bool cboolean2} serialization.format 1 serialization.lib org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe @@ -30831,22 +30831,22 @@ STAGE PLANS: TableScan alias: alltypesparquet filterExpr: (((cint = 49) or (cfloat = 3.5)) and ((cint = 47) or (cfloat = 2.09)) and ((cint = 45) or (cfloat = 3.02))) (type: boolean) - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 5951030 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: (((cint = 45) or (cfloat = 3.02)) and ((cint = 47) or (cfloat = 2.09)) and ((cint = 49) or (cfloat = 3.5))) (type: boolean) - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 5951030 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cbigint (type: bigint), cfloat (type: float), cdouble (type: double), cstring1 (type: string), cstring2 (type: string), ctimestamp1 (type: timestamp), ctimestamp2 (type: timestamp), cboolean1 (type: boolean), cboolean2 (type: boolean) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 5951030 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 5951030 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -30884,7 +30884,7 @@ STAGE PLANS: name default.alltypesparquet numFiles 1 numRows 12288 - rawDataSize 147456 + rawDataSize 0 serialization.ddl struct alltypesparquet { byte ctinyint, i16 csmallint, i32 cint, i64 cbigint, float cfloat, double cdouble, string cstring1, string cstring2, timestamp ctimestamp1, timestamp ctimestamp2, bool cboolean1, bool cboolean2} serialization.format 1 serialization.lib org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe @@ -30906,7 +30906,7 @@ STAGE PLANS: name default.alltypesparquet numFiles 1 numRows 12288 - rawDataSize 147456 + rawDataSize 0 serialization.ddl struct alltypesparquet { byte ctinyint, i16 csmallint, i32 cint, i64 cbigint, float cfloat, double cdouble, string cstring1, string cstring2, timestamp ctimestamp1, timestamp ctimestamp2, bool cboolean1, bool cboolean2} serialization.format 1 serialization.lib org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe @@ -30949,24 +30949,24 @@ STAGE PLANS: TableScan alias: alltypesparquet filterExpr: (cstring1) IN ('biology', 'history', 'topology') (type: boolean) - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 5951030 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: (cstring1) IN ('biology', 'history', 'topology') (type: boolean) - Statistics: Num rows: 6144 Data size: 73728 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6144 Data size: 2975515 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() keys: cstring1 (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 6144 Data size: 73728 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6144 Data size: 2975515 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) null sort order: a sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 6144 Data size: 73728 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6144 Data size: 2975515 Basic stats: COMPLETE Column stats: NONE tag: -1 value expressions: _col1 (type: bigint) auto parallelism: false @@ -30991,7 +30991,7 @@ STAGE PLANS: name default.alltypesparquet numFiles 1 numRows 12288 - rawDataSize 147456 + rawDataSize 0 serialization.ddl struct alltypesparquet { byte ctinyint, i16 csmallint, i32 cint, i64 cbigint, float cfloat, double cdouble, string cstring1, string cstring2, timestamp ctimestamp1, timestamp ctimestamp2, bool cboolean1, bool cboolean2} serialization.format 1 serialization.lib org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe @@ -31013,7 +31013,7 @@ STAGE PLANS: name default.alltypesparquet numFiles 1 numRows 12288 - rawDataSize 147456 + rawDataSize 0 serialization.ddl struct alltypesparquet { byte ctinyint, i16 csmallint, i32 cint, i64 cbigint, float cfloat, double cdouble, string cstring1, string cstring2, timestamp ctimestamp1, timestamp ctimestamp2, bool cboolean1, bool cboolean2} serialization.format 1 serialization.lib org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe @@ -31033,16 +31033,16 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 3072 Data size: 36864 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3072 Data size: 1487757 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col1 (type: bigint), _col0 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 3072 Data size: 36864 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3072 Data size: 1487757 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: string) null sort order: a sort order: + - Statistics: Num rows: 3072 Data size: 36864 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3072 Data size: 1487757 Basic stats: COMPLETE Column stats: NONE tag: -1 value expressions: _col0 (type: bigint) auto parallelism: false @@ -31053,13 +31053,13 @@ STAGE PLANS: Select Operator expressions: VALUE._col0 (type: bigint), KEY.reducesinkkey0 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 3072 Data size: 36864 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3072 Data size: 1487757 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 3072 Data size: 36864 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3072 Data size: 1487757 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat diff --git a/ql/src/test/results/clientpositive/spark/parquet_vectorization_1.q.out b/ql/src/test/results/clientpositive/spark/parquet_vectorization_1.q.out index 5c79743429..812424a92d 100644 --- a/ql/src/test/results/clientpositive/spark/parquet_vectorization_1.q.out +++ b/ql/src/test/results/clientpositive/spark/parquet_vectorization_1.q.out @@ -60,7 +60,7 @@ STAGE PLANS: TableScan alias: alltypesparquet filterExpr: (((cdouble > UDFToDouble(ctinyint)) and (cboolean2 > 0)) or (cbigint < UDFToLong(ctinyint)) or (UDFToLong(cint) > cbigint) or (cboolean1 < 0)) (type: boolean) - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 5951030 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct] @@ -70,7 +70,7 @@ STAGE PLANS: native: true predicateExpression: FilterExprOrExpr(children: FilterExprAndExpr(children: FilterDoubleColGreaterDoubleColumn(col 5:double, col 13:double)(children: CastLongToDouble(col 0:tinyint) -> 13:double), FilterLongColGreaterLongScalar(col 11:boolean, val 0)), FilterLongColLessLongColumn(col 3:bigint, col 0:bigint)(children: col 0:tinyint), FilterLongColGreaterLongColumn(col 2:bigint, col 3:bigint)(children: col 2:int), FilterLongColLessLongScalar(col 10:boolean, val 0)) predicate: (((cdouble > UDFToDouble(ctinyint)) and (cboolean2 > 0)) or (UDFToLong(cint) > cbigint) or (cbigint < UDFToLong(ctinyint)) or (cboolean1 < 0)) (type: boolean) - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 5951030 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ctinyint (type: tinyint), cfloat (type: float), cint (type: int), cdouble (type: double), UDFToDouble(ctinyint) (type: double), (UDFToDouble(ctinyint) * UDFToDouble(ctinyint)) (type: double), (cdouble * cdouble) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 @@ -79,7 +79,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [0, 4, 2, 5, 13, 16, 14] selectExpressions: CastLongToDouble(col 0:tinyint) -> 13:double, DoubleColMultiplyDoubleColumn(col 14:double, col 15:double)(children: CastLongToDouble(col 0:tinyint) -> 14:double, CastLongToDouble(col 0:tinyint) -> 15:double) -> 16:double, DoubleColMultiplyDoubleColumn(col 5:double, col 5:double) -> 14:double - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 5951030 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(_col5), sum(_col4), count(_col0), sum(_col1), max(_col0), max(_col2), sum(_col6), sum(_col3), count(_col3), count(_col2) Group By Vectorization: diff --git a/ql/src/test/results/clientpositive/spark/parquet_vectorization_10.q.out b/ql/src/test/results/clientpositive/spark/parquet_vectorization_10.q.out index dfb187f7b3..a3ae327366 100644 --- a/ql/src/test/results/clientpositive/spark/parquet_vectorization_10.q.out +++ b/ql/src/test/results/clientpositive/spark/parquet_vectorization_10.q.out @@ -64,7 +64,7 @@ STAGE PLANS: TableScan alias: alltypesparquet filterExpr: ((cstring2 <= '10') or ((UDFToDouble(ctinyint) > cdouble) and (CAST( ctinyint AS decimal(6,2)) <= -5638.15)) or ((cdouble > 6981.0D) and ((CAST( csmallint AS decimal(11,4)) = 9763215.5639) or (cstring1 like '%a')))) (type: boolean) - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 5951030 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct] @@ -74,7 +74,7 @@ STAGE PLANS: native: true predicateExpression: FilterExprOrExpr(children: FilterStringGroupColLessEqualStringScalar(col 7:string, val 10), FilterExprAndExpr(children: FilterDoubleColGreaterDoubleColumn(col 13:double, col 5:double)(children: CastLongToDouble(col 0:tinyint) -> 13:double), FilterDecimalColLessEqualDecimalScalar(col 14:decimal(6,2), val -5638.15)(children: CastLongToDecimal(col 0:tinyint) -> 14:decimal(6,2))), FilterExprAndExpr(children: FilterDoubleColGreaterDoubleScalar(col 5:double, val 6981.0), FilterExprOrExpr(children: FilterDecimalColEqualDecimalScalar(col 15:decimal(11,4), val 9763215.5639)(children: CastLongToDecimal(col 1:smallint) -> 15:decimal(11,4)), FilterStringColLikeStringScalar(col 6:string, pattern %a)))) predicate: (((UDFToDouble(ctinyint) > cdouble) and (CAST( ctinyint AS decimal(6,2)) <= -5638.15)) or ((cdouble > 6981.0D) and ((CAST( csmallint AS decimal(11,4)) = 9763215.5639) or (cstring1 like '%a'))) or (cstring2 <= '10')) (type: boolean) - Statistics: Num rows: 9557 Data size: 114684 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 9557 Data size: 4628417 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cdouble (type: double), ctimestamp1 (type: timestamp), ctinyint (type: tinyint), cboolean1 (type: boolean), cstring1 (type: string), (- cdouble) (type: double), (cdouble + UDFToDouble(csmallint)) (type: double), ((cdouble + UDFToDouble(csmallint)) % 33.0D) (type: double), (- cdouble) (type: double), (UDFToDouble(ctinyint) % cdouble) (type: double), (UDFToShort(ctinyint) % csmallint) (type: smallint), (- cdouble) (type: double), (cbigint * UDFToLong((UDFToShort(ctinyint) % csmallint))) (type: bigint), (9763215.5639D - (cdouble + UDFToDouble(csmallint))) (type: double), (- (- cdouble)) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 @@ -83,13 +83,13 @@ STAGE PLANS: native: true projectedOutputColumnNums: [5, 8, 0, 10, 6, 13, 17, 16, 18, 20, 21, 19, 23, 24, 26] selectExpressions: DoubleColUnaryMinus(col 5:double) -> 13:double, DoubleColAddDoubleColumn(col 5:double, col 16:double)(children: CastLongToDouble(col 1:smallint) -> 16:double) -> 17:double, DoubleColModuloDoubleScalar(col 18:double, val 33.0)(children: DoubleColAddDoubleColumn(col 5:double, col 16:double)(children: CastLongToDouble(col 1:smallint) -> 16:double) -> 18:double) -> 16:double, DoubleColUnaryMinus(col 5:double) -> 18:double, DoubleColModuloDoubleColumn(col 19:double, col 5:double)(children: CastLongToDouble(col 0:tinyint) -> 19:double) -> 20:double, LongColModuloLongColumn(col 0:smallint, col 1:smallint)(children: col 0:tinyint) -> 21:smallint, DoubleColUnaryMinus(col 5:double) -> 19:double, LongColMultiplyLongColumn(col 3:bigint, col 22:bigint)(children: col 22:smallint) -> 23:bigint, DoubleScalarSubtractDoubleColumn(val 9763215.5639, col 25:double)(children: DoubleColAddDoubleColumn(col 5:double, col 24:double)(children: CastLongToDouble(col 1:smallint) -> 24:double) -> 25:double) -> 24:double, DoubleColUnaryMinus(col 25:double)(children: DoubleColUnaryMinus(col 5:double) -> 25:double) -> 26:double - Statistics: Num rows: 9557 Data size: 114684 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 9557 Data size: 4628417 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 9557 Data size: 114684 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 9557 Data size: 4628417 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/spark/parquet_vectorization_11.q.out b/ql/src/test/results/clientpositive/spark/parquet_vectorization_11.q.out index 995316e2d1..d557dbb1e8 100644 --- a/ql/src/test/results/clientpositive/spark/parquet_vectorization_11.q.out +++ b/ql/src/test/results/clientpositive/spark/parquet_vectorization_11.q.out @@ -46,7 +46,7 @@ STAGE PLANS: TableScan alias: alltypesparquet filterExpr: ((cstring2 = cstring1) or (ctimestamp1 is null and (cstring1 like '%a'))) (type: boolean) - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 5951030 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct] @@ -56,7 +56,7 @@ STAGE PLANS: native: true predicateExpression: FilterExprOrExpr(children: FilterStringGroupColEqualStringGroupColumn(col 7:string, col 6:string), FilterExprAndExpr(children: SelectColumnIsNull(col 8:timestamp), FilterStringColLikeStringScalar(col 6:string, pattern %a))) predicate: ((cstring2 = cstring1) or (ctimestamp1 is null and (cstring1 like '%a'))) (type: boolean) - Statistics: Num rows: 9216 Data size: 110592 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 9216 Data size: 4463272 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cstring1 (type: string), cboolean1 (type: boolean), cdouble (type: double), ctimestamp1 (type: timestamp), (-3728 * UDFToInteger(csmallint)) (type: int), (cdouble - 9763215.5639D) (type: double), (- cdouble) (type: double), ((- cdouble) + 6981.0D) (type: double), (cdouble * -5638.15D) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 @@ -65,13 +65,13 @@ STAGE PLANS: native: true projectedOutputColumnNums: [6, 10, 5, 8, 13, 14, 15, 17, 16] selectExpressions: LongScalarMultiplyLongColumn(val -3728, col 1:int)(children: col 1:smallint) -> 13:int, DoubleColSubtractDoubleScalar(col 5:double, val 9763215.5639) -> 14:double, DoubleColUnaryMinus(col 5:double) -> 15:double, DoubleColAddDoubleScalar(col 16:double, val 6981.0)(children: DoubleColUnaryMinus(col 5:double) -> 16:double) -> 17:double, DoubleColMultiplyDoubleScalar(col 5:double, val -5638.15) -> 16:double - Statistics: Num rows: 9216 Data size: 110592 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 9216 Data size: 4463272 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 9216 Data size: 110592 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 9216 Data size: 4463272 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/spark/parquet_vectorization_12.q.out b/ql/src/test/results/clientpositive/spark/parquet_vectorization_12.q.out index de7abe08df..f77b8c2abb 100644 --- a/ql/src/test/results/clientpositive/spark/parquet_vectorization_12.q.out +++ b/ql/src/test/results/clientpositive/spark/parquet_vectorization_12.q.out @@ -83,7 +83,7 @@ STAGE PLANS: TableScan alias: alltypesparquet filterExpr: (ctimestamp1 is null and ((cboolean1 >= cboolean2) or (UDFToShort(ctinyint) <> csmallint)) and ((cstring1 like '%a') or ((cboolean2 <= 1) and (cbigint >= UDFToLong(csmallint))))) (type: boolean) - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 5951030 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct] @@ -93,7 +93,7 @@ STAGE PLANS: native: true predicateExpression: FilterExprAndExpr(children: SelectColumnIsNull(col 8:timestamp), FilterExprOrExpr(children: FilterLongColGreaterEqualLongColumn(col 10:boolean, col 11:boolean), FilterLongColNotEqualLongColumn(col 0:smallint, col 1:smallint)(children: col 0:tinyint)), FilterExprOrExpr(children: FilterStringColLikeStringScalar(col 6:string, pattern %a), FilterExprAndExpr(children: FilterLongColLessEqualLongScalar(col 11:boolean, val 1), FilterLongColGreaterEqualLongColumn(col 3:bigint, col 1:bigint)(children: col 1:smallint)))) predicate: (((cboolean1 >= cboolean2) or (UDFToShort(ctinyint) <> csmallint)) and ((cstring1 like '%a') or ((cboolean2 <= 1) and (cbigint >= UDFToLong(csmallint)))) and ctimestamp1 is null) (type: boolean) - Statistics: Num rows: 3754 Data size: 45048 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3754 Data size: 1818047 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cbigint (type: bigint), cboolean1 (type: boolean), cstring1 (type: string), cdouble (type: double), UDFToDouble(cbigint) (type: double), (UDFToDouble(cbigint) * UDFToDouble(cbigint)) (type: double), (cdouble * cdouble) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 @@ -102,7 +102,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [3, 10, 6, 5, 13, 16, 14] selectExpressions: CastLongToDouble(col 3:bigint) -> 13:double, DoubleColMultiplyDoubleColumn(col 14:double, col 15:double)(children: CastLongToDouble(col 3:bigint) -> 14:double, CastLongToDouble(col 3:bigint) -> 15:double) -> 16:double, DoubleColMultiplyDoubleColumn(col 5:double, col 5:double) -> 14:double - Statistics: Num rows: 3754 Data size: 45048 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3754 Data size: 1818047 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(_col0), sum(_col5), sum(_col4), sum(_col3), count(_col3), sum(_col0), sum(_col6) Group By Vectorization: @@ -116,7 +116,7 @@ STAGE PLANS: keys: _col3 (type: double), _col0 (type: bigint), _col2 (type: string), _col1 (type: boolean) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 - Statistics: Num rows: 3754 Data size: 45048 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3754 Data size: 1818047 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: double), _col1 (type: bigint), _col2 (type: string), _col3 (type: boolean) sort order: ++++ @@ -127,7 +127,7 @@ STAGE PLANS: native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true valueColumnNums: [4, 5, 6, 7, 8, 9, 10] - Statistics: Num rows: 3754 Data size: 45048 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3754 Data size: 1818047 Basic stats: COMPLETE Column stats: NONE value expressions: _col4 (type: bigint), _col5 (type: double), _col6 (type: double), _col7 (type: double), _col8 (type: bigint), _col9 (type: bigint), _col10 (type: double) Execution mode: vectorized Map Vectorization: @@ -174,7 +174,7 @@ STAGE PLANS: keys: KEY._col0 (type: double), KEY._col1 (type: bigint), KEY._col2 (type: string), KEY._col3 (type: boolean) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 - Statistics: Num rows: 1877 Data size: 22524 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1877 Data size: 909023 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col1 (type: bigint), _col3 (type: boolean), _col2 (type: string), _col0 (type: double), (-6432.0D * _col0) (type: double), (- _col1) (type: bigint), _col4 (type: bigint), (_col1 * _col4) (type: bigint), power(((_col5 - ((_col6 * _col6) / _col4)) / CASE WHEN ((_col4 = 1L)) THEN (null) ELSE ((_col4 - 1)) END), 0.5) (type: double), ((-6432.0D * _col0) / -6432.0D) (type: double), (- ((-6432.0D * _col0) / -6432.0D)) (type: double), (_col7 / _col8) (type: double), (- (-6432.0D * _col0)) (type: double), (-5638.15 + CAST( _col1 AS decimal(19,0))) (type: decimal(22,2)), _col9 (type: bigint), ((_col7 / _col8) / (-6432.0D * _col0)) (type: double), (- (- ((-6432.0D * _col0) / -6432.0D))) (type: double), (((-6432.0D * _col0) / -6432.0D) + (- (-6432.0D * _col0))) (type: double), power(((_col10 - ((_col7 * _col7) / _col8)) / _col8), 0.5) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col17, _col18, _col19 @@ -183,7 +183,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [1, 3, 2, 0, 11, 12, 4, 13, 14, 19, 15, 20, 22, 24, 9, 26, 25, 21, 27] selectExpressions: DoubleScalarMultiplyDoubleColumn(val -6432.0, col 0:double) -> 11:double, LongColUnaryMinus(col 1:bigint) -> 12:bigint, LongColMultiplyLongColumn(col 1:bigint, col 4:bigint) -> 13:bigint, FuncPowerDoubleToDouble(col 15:double)(children: DoubleColDivideLongColumn(col 14:double, col 18:bigint)(children: DoubleColSubtractDoubleColumn(col 5:double, col 15:double)(children: DoubleColDivideLongColumn(col 14:double, col 4:bigint)(children: DoubleColMultiplyDoubleColumn(col 6:double, col 6:double) -> 14:double) -> 15:double) -> 14:double, IfExprNullCondExpr(col 16:boolean, null, col 17:bigint)(children: LongColEqualLongScalar(col 4:bigint, val 1) -> 16:boolean, LongColSubtractLongScalar(col 4:bigint, val 1) -> 17:bigint) -> 18:bigint) -> 15:double) -> 14:double, DoubleColDivideDoubleScalar(col 15:double, val -6432.0)(children: DoubleScalarMultiplyDoubleColumn(val -6432.0, col 0:double) -> 15:double) -> 19:double, DoubleColUnaryMinus(col 20:double)(children: DoubleColDivideDoubleScalar(col 15:double, val -6432.0)(children: DoubleScalarMultiplyDoubleColumn(val -6432.0, col 0:double) -> 15:double) -> 20:double) -> 15:double, DoubleColDivideLongColumn(col 7:double, col 8:bigint) -> 20:double, DoubleColUnaryMinus(col 21:double)(children: DoubleScalarMultiplyDoubleColumn(val -6432.0, col 0:double) -> 21:double) -> 22:double, DecimalScalarAddDecimalColumn(val -5638.15, col 23:decimal(19,0))(children: CastLongToDecimal(col 1:bigint) -> 23:decimal(19,0)) -> 24:decimal(22,2), DoubleColDivideDoubleColumn(col 21:double, col 25:double)(children: DoubleColDivideLongColumn(col 7:double, col 8:bigint) -> 21:double, DoubleScalarMultiplyDoubleColumn(val -6432.0, col 0:double) -> 25:double) -> 26:double, DoubleColUnaryMinus(col 21:double)(children: DoubleColUnaryMinus(col 25:double)(children: DoubleColDivideDoubleScalar(col 21:double, val -6432.0)(children: DoubleScalarMultiplyDoubleColumn(val -6432.0, col 0:double) -> 21:double) -> 25:double) -> 21:double) -> 25:double, DoubleColAddDoubleColumn(col 27:double, col 28:double)(children: DoubleColDivideDoubleScalar(col 21:double, val -6432.0)(children: DoubleScalarMultiplyDoubleColumn(val -6432.0, col 0:double) -> 21:double) -> 27:double, DoubleColUnaryMinus(col 21:double)(children: DoubleScalarMultiplyDoubleColumn(val -6432.0, col 0:double) -> 21:double) -> 28:double) -> 21:double, FuncPowerDoubleToDouble(col 28:double)(children: DoubleColDivideLongColumn(col 27:double, col 8:bigint)(children: DoubleColSubtractDoubleColumn(col 10:double, col 28:double)(children: DoubleColDivideLongColumn(col 27:double, col 8:bigint)(children: DoubleColMultiplyDoubleColumn(col 7:double, col 7:double) -> 27:double) -> 28:double) -> 27:double) -> 28:double) -> 27:double - Statistics: Num rows: 1877 Data size: 22524 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1877 Data size: 909023 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col3 (type: double), _col0 (type: bigint), _col2 (type: string) sort order: +++ @@ -193,7 +193,7 @@ STAGE PLANS: native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true valueColumnNums: [3, 11, 12, 4, 13, 14, 19, 15, 20, 22, 24, 9, 26, 25, 21, 27] - Statistics: Num rows: 1877 Data size: 22524 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1877 Data size: 909023 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: boolean), _col4 (type: double), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: bigint), _col8 (type: double), _col9 (type: double), _col10 (type: double), _col11 (type: double), _col12 (type: double), _col13 (type: decimal(22,2)), _col14 (type: bigint), _col15 (type: double), _col17 (type: double), _col18 (type: double), _col19 (type: double) Reducer 3 Execution mode: vectorized @@ -219,13 +219,13 @@ STAGE PLANS: native: true projectedOutputColumnNums: [1, 3, 2, 19, 0, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 11, 16, 17, 18] selectExpressions: ConstantVectorExpression(val null) -> 19:timestamp - Statistics: Num rows: 1877 Data size: 22524 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1877 Data size: 909023 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 1877 Data size: 22524 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1877 Data size: 909023 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/spark/parquet_vectorization_13.q.out b/ql/src/test/results/clientpositive/spark/parquet_vectorization_13.q.out index 09b50c77b5..e3ac8fc462 100644 --- a/ql/src/test/results/clientpositive/spark/parquet_vectorization_13.q.out +++ b/ql/src/test/results/clientpositive/spark/parquet_vectorization_13.q.out @@ -85,7 +85,7 @@ STAGE PLANS: TableScan alias: alltypesparquet filterExpr: (((cfloat < 3569) and (cdouble <= 10.175D) and (cboolean1 <> 1)) or ((UDFToDouble(ctimestamp1) > -28789.0D) and (UDFToDouble(ctimestamp2) <> -28788.0D) and (CAST( ctinyint AS decimal(11,4)) < 9763215.5639))) (type: boolean) - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 5951030 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct] @@ -95,7 +95,7 @@ STAGE PLANS: native: true predicateExpression: FilterExprOrExpr(children: FilterExprAndExpr(children: FilterDoubleColLessDoubleScalar(col 4:float, val 3569.0), FilterDoubleColLessEqualDoubleScalar(col 5:double, val 10.175), FilterLongColNotEqualLongScalar(col 10:boolean, val 1)), FilterExprAndExpr(children: FilterDoubleColGreaterDoubleScalar(col 13:double, val -28789.0)(children: CastTimestampToDouble(col 8:timestamp) -> 13:double), FilterDoubleColNotEqualDoubleScalar(col 13:double, val -28788.0)(children: CastTimestampToDouble(col 9:timestamp) -> 13:double), FilterDecimalColLessDecimalScalar(col 14:decimal(11,4), val 9763215.5639)(children: CastLongToDecimal(col 0:tinyint) -> 14:decimal(11,4)))) predicate: (((UDFToDouble(ctimestamp1) > -28789.0D) and (UDFToDouble(ctimestamp2) <> -28788.0D) and (CAST( ctinyint AS decimal(11,4)) < 9763215.5639)) or ((cfloat < 3569) and (cdouble <= 10.175D) and (cboolean1 <> 1))) (type: boolean) - Statistics: Num rows: 2730 Data size: 32760 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2730 Data size: 1322128 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cboolean1 (type: boolean), ctinyint (type: tinyint), ctimestamp1 (type: timestamp), cfloat (type: float), cstring1 (type: string), UDFToDouble(cfloat) (type: double), (UDFToDouble(cfloat) * UDFToDouble(cfloat)) (type: double), UDFToDouble(ctinyint) (type: double), (UDFToDouble(ctinyint) * UDFToDouble(ctinyint)) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 @@ -104,7 +104,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [10, 0, 8, 4, 6, 4, 13, 15, 18] selectExpressions: DoubleColMultiplyDoubleColumn(col 4:double, col 4:double)(children: col 4:float, col 4:float) -> 13:double, CastLongToDouble(col 0:tinyint) -> 15:double, DoubleColMultiplyDoubleColumn(col 16:double, col 17:double)(children: CastLongToDouble(col 0:tinyint) -> 16:double, CastLongToDouble(col 0:tinyint) -> 17:double) -> 18:double - Statistics: Num rows: 2730 Data size: 32760 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2730 Data size: 1322128 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: max(_col1), sum(_col3), sum(_col6), sum(_col5), count(_col3), sum(_col8), sum(_col7), count(_col1), max(_col3), min(_col1) Group By Vectorization: @@ -118,7 +118,7 @@ STAGE PLANS: keys: _col0 (type: boolean), _col1 (type: tinyint), _col2 (type: timestamp), _col3 (type: float), _col4 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 - Statistics: Num rows: 2730 Data size: 32760 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2730 Data size: 1322128 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: boolean), _col1 (type: tinyint), _col2 (type: timestamp), _col3 (type: float), _col4 (type: string) sort order: +++++ @@ -129,7 +129,7 @@ STAGE PLANS: native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true valueColumnNums: [5, 6, 7, 8, 9, 10, 11, 12, 13, 14] - Statistics: Num rows: 2730 Data size: 32760 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2730 Data size: 1322128 Basic stats: COMPLETE Column stats: NONE value expressions: _col5 (type: tinyint), _col6 (type: double), _col7 (type: double), _col8 (type: double), _col9 (type: bigint), _col10 (type: double), _col11 (type: double), _col12 (type: bigint), _col13 (type: float), _col14 (type: tinyint) Execution mode: vectorized Map Vectorization: @@ -176,7 +176,7 @@ STAGE PLANS: keys: KEY._col0 (type: boolean), KEY._col1 (type: tinyint), KEY._col2 (type: timestamp), KEY._col3 (type: float), KEY._col4 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 - Statistics: Num rows: 1365 Data size: 16380 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1365 Data size: 661064 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: boolean), _col1 (type: tinyint), _col2 (type: timestamp), _col3 (type: float), _col4 (type: string), (- _col1) (type: tinyint), _col5 (type: tinyint), ((- _col1) + _col5) (type: tinyint), _col6 (type: double), (_col6 * UDFToDouble(((- _col1) + _col5))) (type: double), (- _col6) (type: double), (79.553 * _col3) (type: float), power(((_col7 - ((_col8 * _col8) / _col9)) / _col9), 0.5) (type: double), (- _col6) (type: double), power(((_col10 - ((_col11 * _col11) / _col12)) / _col12), 0.5) (type: double), (CAST( ((- _col1) + _col5) AS decimal(3,0)) - 10.175) (type: decimal(7,3)), (- (- _col6)) (type: double), (-26.28D / (- (- _col6))) (type: double), _col13 (type: float), ((_col6 * UDFToDouble(((- _col1) + _col5))) / UDFToDouble(_col1)) (type: double), _col14 (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 @@ -185,7 +185,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [0, 1, 2, 3, 4, 15, 5, 17, 6, 20, 19, 21, 22, 23, 24, 27, 28, 25, 13, 31, 14] selectExpressions: LongColUnaryMinus(col 1:tinyint) -> 15:tinyint, LongColAddLongColumn(col 16:tinyint, col 5:tinyint)(children: LongColUnaryMinus(col 1:tinyint) -> 16:tinyint) -> 17:tinyint, DoubleColMultiplyDoubleColumn(col 6:double, col 19:double)(children: CastLongToDouble(col 18:tinyint)(children: LongColAddLongColumn(col 16:tinyint, col 5:tinyint)(children: LongColUnaryMinus(col 1:tinyint) -> 16:tinyint) -> 18:tinyint) -> 19:double) -> 20:double, DoubleColUnaryMinus(col 6:double) -> 19:double, DoubleScalarMultiplyDoubleColumn(val 79.5530014038086, col 3:float) -> 21:float, FuncPowerDoubleToDouble(col 23:double)(children: DoubleColDivideLongColumn(col 22:double, col 9:bigint)(children: DoubleColSubtractDoubleColumn(col 7:double, col 23:double)(children: DoubleColDivideLongColumn(col 22:double, col 9:bigint)(children: DoubleColMultiplyDoubleColumn(col 8:double, col 8:double) -> 22:double) -> 23:double) -> 22:double) -> 23:double) -> 22:double, DoubleColUnaryMinus(col 6:double) -> 23:double, FuncPowerDoubleToDouble(col 25:double)(children: DoubleColDivideLongColumn(col 24:double, col 12:bigint)(children: DoubleColSubtractDoubleColumn(col 10:double, col 25:double)(children: DoubleColDivideLongColumn(col 24:double, col 12:bigint)(children: DoubleColMultiplyDoubleColumn(col 11:double, col 11:double) -> 24:double) -> 25:double) -> 24:double) -> 25:double) -> 24:double, DecimalColSubtractDecimalScalar(col 26:decimal(3,0), val 10.175)(children: CastLongToDecimal(col 18:tinyint)(children: LongColAddLongColumn(col 16:tinyint, col 5:tinyint)(children: LongColUnaryMinus(col 1:tinyint) -> 16:tinyint) -> 18:tinyint) -> 26:decimal(3,0)) -> 27:decimal(7,3), DoubleColUnaryMinus(col 25:double)(children: DoubleColUnaryMinus(col 6:double) -> 25:double) -> 28:double, DoubleScalarDivideDoubleColumn(val -26.28, col 29:double)(children: DoubleColUnaryMinus(col 25:double)(children: DoubleColUnaryMinus(col 6:double) -> 25:double) -> 29:double) -> 25:double, DoubleColDivideDoubleColumn(col 30:double, col 29:double)(children: DoubleColMultiplyDoubleColumn(col 6:double, col 29:double)(children: CastLongToDouble(col 18:tinyint)(children: LongColAddLongColumn(col 16:tinyint, col 5:tinyint)(children: LongColUnaryMinus(col 1:tinyint) -> 16:tinyint) -> 18:tinyint) -> 29:double) -> 30:double, CastLongToDouble(col 1:tinyint) -> 29:double) -> 31:double - Statistics: Num rows: 1365 Data size: 16380 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1365 Data size: 661064 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: boolean), _col1 (type: tinyint), _col2 (type: timestamp), _col3 (type: float), _col4 (type: string), _col5 (type: tinyint), _col6 (type: tinyint), _col7 (type: tinyint), _col8 (type: double), _col9 (type: double), _col10 (type: double), _col11 (type: float), _col12 (type: double), _col13 (type: double), _col14 (type: double), _col15 (type: decimal(7,3)), _col16 (type: double), _col17 (type: double), _col18 (type: float), _col19 (type: double), _col20 (type: tinyint) sort order: +++++++++++++++++++++ @@ -195,7 +195,7 @@ STAGE PLANS: native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true valueColumnNums: [] - Statistics: Num rows: 1365 Data size: 16380 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1365 Data size: 661064 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 Reducer 3 Execution mode: vectorized @@ -220,19 +220,19 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 10, 14, 15, 16, 17, 18, 19, 20] - Statistics: Num rows: 1365 Data size: 16380 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1365 Data size: 661064 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 40 Limit Vectorization: className: VectorLimitOperator native: true - Statistics: Num rows: 40 Data size: 480 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 19360 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 40 Data size: 480 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 19360 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -439,7 +439,7 @@ STAGE PLANS: TableScan alias: alltypesparquet filterExpr: (((cfloat < 3569) and (cdouble <= 10.175D) and (cboolean1 <> 1)) or ((UDFToDouble(ctimestamp1) > -28801.388D) and (UDFToDouble(ctimestamp2) <> -28801.336D) and (CAST( ctinyint AS decimal(11,4)) < 9763215.5639))) (type: boolean) - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 5951030 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true Filter Operator @@ -448,7 +448,7 @@ STAGE PLANS: native: true predicateExpression: FilterExprOrExpr(children: FilterExprAndExpr(children: FilterDoubleColLessDoubleScalar(col 4:float, val 3569.0), FilterDoubleColLessEqualDoubleScalar(col 5:double, val 10.175), FilterLongColNotEqualLongScalar(col 10:boolean, val 1)), FilterExprAndExpr(children: FilterDoubleColGreaterDoubleScalar(col 13:double, val -28801.388)(children: CastTimestampToDouble(col 8:timestamp) -> 13:double), FilterDoubleColNotEqualDoubleScalar(col 13:double, val -28801.336)(children: CastTimestampToDouble(col 9:timestamp) -> 13:double), FilterDecimalColLessDecimalScalar(col 14:decimal(11,4), val 9763215.5639)(children: CastLongToDecimal(col 0:tinyint) -> 14:decimal(11,4)))) predicate: (((UDFToDouble(ctimestamp1) > -28801.388D) and (UDFToDouble(ctimestamp2) <> -28801.336D) and (CAST( ctinyint AS decimal(11,4)) < 9763215.5639)) or ((cfloat < 3569) and (cdouble <= 10.175D) and (cboolean1 <> 1))) (type: boolean) - Statistics: Num rows: 2730 Data size: 32760 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2730 Data size: 1322128 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cboolean1 (type: boolean), ctinyint (type: tinyint), ctimestamp1 (type: timestamp), cfloat (type: float), cstring1 (type: string), UDFToDouble(cfloat) (type: double), (UDFToDouble(cfloat) * UDFToDouble(cfloat)) (type: double), UDFToDouble(ctinyint) (type: double), (UDFToDouble(ctinyint) * UDFToDouble(ctinyint)) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 @@ -457,7 +457,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [10, 0, 8, 4, 6, 4, 13, 15, 18] selectExpressions: DoubleColMultiplyDoubleColumn(col 4:double, col 4:double)(children: col 4:float, col 4:float) -> 13:double, CastLongToDouble(col 0:tinyint) -> 15:double, DoubleColMultiplyDoubleColumn(col 16:double, col 17:double)(children: CastLongToDouble(col 0:tinyint) -> 16:double, CastLongToDouble(col 0:tinyint) -> 17:double) -> 18:double - Statistics: Num rows: 2730 Data size: 32760 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2730 Data size: 1322128 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: max(_col1), sum(_col3), sum(_col6), sum(_col5), count(_col3), sum(_col8), sum(_col7), count(_col1), max(_col3), min(_col1) Group By Vectorization: @@ -471,7 +471,7 @@ STAGE PLANS: keys: _col0 (type: boolean), _col1 (type: tinyint), _col2 (type: timestamp), _col3 (type: float), _col4 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 - Statistics: Num rows: 2730 Data size: 32760 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2730 Data size: 1322128 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: boolean), _col1 (type: tinyint), _col2 (type: timestamp), _col3 (type: float), _col4 (type: string) sort order: +++++ @@ -480,7 +480,7 @@ STAGE PLANS: className: VectorReduceSinkMultiKeyOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 2730 Data size: 32760 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2730 Data size: 1322128 Basic stats: COMPLETE Column stats: NONE value expressions: _col5 (type: tinyint), _col6 (type: double), _col7 (type: double), _col8 (type: double), _col9 (type: bigint), _col10 (type: double), _col11 (type: double), _col12 (type: bigint), _col13 (type: float), _col14 (type: tinyint) Execution mode: vectorized Map Vectorization: @@ -514,7 +514,7 @@ STAGE PLANS: keys: KEY._col0 (type: boolean), KEY._col1 (type: tinyint), KEY._col2 (type: timestamp), KEY._col3 (type: float), KEY._col4 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 - Statistics: Num rows: 1365 Data size: 16380 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1365 Data size: 661064 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: boolean), _col1 (type: tinyint), _col2 (type: timestamp), _col3 (type: float), _col4 (type: string), (- _col1) (type: tinyint), _col5 (type: tinyint), ((- _col1) + _col5) (type: tinyint), _col6 (type: double), (_col6 * UDFToDouble(((- _col1) + _col5))) (type: double), (- _col6) (type: double), (79.553 * _col3) (type: float), power(((_col7 - ((_col8 * _col8) / _col9)) / _col9), 0.5) (type: double), (- _col6) (type: double), power(((_col10 - ((_col11 * _col11) / _col12)) / _col12), 0.5) (type: double), (CAST( ((- _col1) + _col5) AS decimal(3,0)) - 10.175) (type: decimal(7,3)), (- (- _col6)) (type: double), (-26.28D / (- (- _col6))) (type: double), _col13 (type: float), ((_col6 * UDFToDouble(((- _col1) + _col5))) / UDFToDouble(_col1)) (type: double), _col14 (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 @@ -523,7 +523,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [0, 1, 2, 3, 4, 15, 5, 17, 6, 20, 19, 21, 22, 23, 24, 27, 28, 25, 13, 31, 14] selectExpressions: LongColUnaryMinus(col 1:tinyint) -> 15:tinyint, LongColAddLongColumn(col 16:tinyint, col 5:tinyint)(children: LongColUnaryMinus(col 1:tinyint) -> 16:tinyint) -> 17:tinyint, DoubleColMultiplyDoubleColumn(col 6:double, col 19:double)(children: CastLongToDouble(col 18:tinyint)(children: LongColAddLongColumn(col 16:tinyint, col 5:tinyint)(children: LongColUnaryMinus(col 1:tinyint) -> 16:tinyint) -> 18:tinyint) -> 19:double) -> 20:double, DoubleColUnaryMinus(col 6:double) -> 19:double, DoubleScalarMultiplyDoubleColumn(val 79.5530014038086, col 3:float) -> 21:float, FuncPowerDoubleToDouble(col 23:double)(children: DoubleColDivideLongColumn(col 22:double, col 9:bigint)(children: DoubleColSubtractDoubleColumn(col 7:double, col 23:double)(children: DoubleColDivideLongColumn(col 22:double, col 9:bigint)(children: DoubleColMultiplyDoubleColumn(col 8:double, col 8:double) -> 22:double) -> 23:double) -> 22:double) -> 23:double) -> 22:double, DoubleColUnaryMinus(col 6:double) -> 23:double, FuncPowerDoubleToDouble(col 25:double)(children: DoubleColDivideLongColumn(col 24:double, col 12:bigint)(children: DoubleColSubtractDoubleColumn(col 10:double, col 25:double)(children: DoubleColDivideLongColumn(col 24:double, col 12:bigint)(children: DoubleColMultiplyDoubleColumn(col 11:double, col 11:double) -> 24:double) -> 25:double) -> 24:double) -> 25:double) -> 24:double, DecimalColSubtractDecimalScalar(col 26:decimal(3,0), val 10.175)(children: CastLongToDecimal(col 18:tinyint)(children: LongColAddLongColumn(col 16:tinyint, col 5:tinyint)(children: LongColUnaryMinus(col 1:tinyint) -> 16:tinyint) -> 18:tinyint) -> 26:decimal(3,0)) -> 27:decimal(7,3), DoubleColUnaryMinus(col 25:double)(children: DoubleColUnaryMinus(col 6:double) -> 25:double) -> 28:double, DoubleScalarDivideDoubleColumn(val -26.28, col 29:double)(children: DoubleColUnaryMinus(col 25:double)(children: DoubleColUnaryMinus(col 6:double) -> 25:double) -> 29:double) -> 25:double, DoubleColDivideDoubleColumn(col 30:double, col 29:double)(children: DoubleColMultiplyDoubleColumn(col 6:double, col 29:double)(children: CastLongToDouble(col 18:tinyint)(children: LongColAddLongColumn(col 16:tinyint, col 5:tinyint)(children: LongColUnaryMinus(col 1:tinyint) -> 16:tinyint) -> 18:tinyint) -> 29:double) -> 30:double, CastLongToDouble(col 1:tinyint) -> 29:double) -> 31:double - Statistics: Num rows: 1365 Data size: 16380 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1365 Data size: 661064 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: boolean), _col1 (type: tinyint), _col2 (type: timestamp), _col3 (type: float), _col4 (type: string), _col5 (type: tinyint), _col6 (type: tinyint), _col7 (type: tinyint), _col8 (type: double), _col9 (type: double), _col10 (type: double), _col11 (type: float), _col12 (type: double), _col13 (type: double), _col14 (type: double), _col15 (type: decimal(7,3)), _col16 (type: double), _col17 (type: double), _col18 (type: float), _col19 (type: double), _col20 (type: tinyint) sort order: +++++++++++++++++++++ @@ -531,7 +531,7 @@ STAGE PLANS: className: VectorReduceSinkObjectHashOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 1365 Data size: 16380 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1365 Data size: 661064 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 Reducer 3 Execution mode: vectorized @@ -549,19 +549,19 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 10, 14, 15, 16, 17, 18, 19, 20] - Statistics: Num rows: 1365 Data size: 16380 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1365 Data size: 661064 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 40 Limit Vectorization: className: VectorLimitOperator native: true - Statistics: Num rows: 40 Data size: 480 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 19360 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 40 Data size: 480 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 19360 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/spark/parquet_vectorization_14.q.out b/ql/src/test/results/clientpositive/spark/parquet_vectorization_14.q.out index ca43d13c00..8cb1f869c8 100644 --- a/ql/src/test/results/clientpositive/spark/parquet_vectorization_14.q.out +++ b/ql/src/test/results/clientpositive/spark/parquet_vectorization_14.q.out @@ -85,7 +85,7 @@ STAGE PLANS: TableScan alias: alltypesparquet filterExpr: ((UDFToLong(ctinyint) <= cbigint) and ((UDFToDouble(cint) <= cdouble) or (ctimestamp2 < ctimestamp1)) and (cdouble < UDFToDouble(ctinyint)) and ((cbigint > -257L) or (cfloat < UDFToFloat(cint)))) (type: boolean) - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 5951030 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct] @@ -95,7 +95,7 @@ STAGE PLANS: native: true predicateExpression: FilterExprAndExpr(children: FilterLongColLessEqualLongColumn(col 0:bigint, col 3:bigint)(children: col 0:tinyint), FilterExprOrExpr(children: FilterDoubleColLessEqualDoubleColumn(col 13:double, col 5:double)(children: CastLongToDouble(col 2:int) -> 13:double), FilterTimestampColLessTimestampColumn(col 9:timestamp, col 8:timestamp)), FilterDoubleColLessDoubleColumn(col 5:double, col 13:double)(children: CastLongToDouble(col 0:tinyint) -> 13:double), FilterExprOrExpr(children: FilterLongColGreaterLongScalar(col 3:bigint, val -257), FilterDoubleColLessDoubleColumn(col 4:float, col 13:float)(children: CastLongToFloatViaLongToDouble(col 2:int) -> 13:float))) predicate: (((UDFToDouble(cint) <= cdouble) or (ctimestamp2 < ctimestamp1)) and ((cbigint > -257L) or (cfloat < UDFToFloat(cint))) and (UDFToLong(ctinyint) <= cbigint) and (cdouble < UDFToDouble(ctinyint))) (type: boolean) - Statistics: Num rows: 606 Data size: 7272 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 606 Data size: 293483 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ctimestamp1 (type: timestamp), cfloat (type: float), cstring1 (type: string), cboolean1 (type: boolean), cdouble (type: double), (- (-26.28D + cdouble)) (type: double), ((- (-26.28D + cdouble)) * (- (-26.28D + cdouble))) (type: double), UDFToDouble(cfloat) (type: double), (UDFToDouble(cfloat) * UDFToDouble(cfloat)) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 @@ -104,7 +104,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [8, 4, 6, 10, 5, 14, 13, 4, 15] selectExpressions: DoubleColUnaryMinus(col 13:double)(children: DoubleScalarAddDoubleColumn(val -26.28, col 5:double) -> 13:double) -> 14:double, DoubleColMultiplyDoubleColumn(col 15:double, col 16:double)(children: DoubleColUnaryMinus(col 13:double)(children: DoubleScalarAddDoubleColumn(val -26.28, col 5:double) -> 13:double) -> 15:double, DoubleColUnaryMinus(col 13:double)(children: DoubleScalarAddDoubleColumn(val -26.28, col 5:double) -> 13:double) -> 16:double) -> 13:double, DoubleColMultiplyDoubleColumn(col 4:double, col 4:double)(children: col 4:float, col 4:float) -> 15:double - Statistics: Num rows: 606 Data size: 7272 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 606 Data size: 293483 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(_col6), sum(_col5), count(_col5), max(_col1), sum(_col8), sum(_col7), count(_col1) Group By Vectorization: @@ -118,7 +118,7 @@ STAGE PLANS: keys: _col2 (type: string), _col1 (type: float), _col4 (type: double), _col0 (type: timestamp), _col3 (type: boolean) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 - Statistics: Num rows: 606 Data size: 7272 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 606 Data size: 293483 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: float), _col2 (type: double), _col3 (type: timestamp), _col4 (type: boolean) sort order: +++++ @@ -129,7 +129,7 @@ STAGE PLANS: native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true valueColumnNums: [5, 6, 7, 8, 9, 10, 11] - Statistics: Num rows: 606 Data size: 7272 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 606 Data size: 293483 Basic stats: COMPLETE Column stats: NONE value expressions: _col5 (type: double), _col6 (type: double), _col7 (type: bigint), _col8 (type: float), _col9 (type: double), _col10 (type: double), _col11 (type: bigint) Execution mode: vectorized Map Vectorization: @@ -176,7 +176,7 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: float), KEY._col2 (type: double), KEY._col3 (type: timestamp), KEY._col4 (type: boolean) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 - Statistics: Num rows: 303 Data size: 3636 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 303 Data size: 146741 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col3 (type: timestamp), _col1 (type: float), _col0 (type: string), _col4 (type: boolean), _col2 (type: double), (-26.28D + _col2) (type: double), (- (-26.28D + _col2)) (type: double), power(((_col5 - ((_col6 * _col6) / _col7)) / CASE WHEN ((_col7 = 1L)) THEN (null) ELSE ((_col7 - 1)) END), 0.5) (type: double), (_col1 * -26.28) (type: float), _col8 (type: float), (- _col1) (type: float), (- _col8) (type: float), ((- (-26.28D + _col2)) / 10.175D) (type: double), power(((_col9 - ((_col10 * _col10) / _col11)) / _col11), 0.5) (type: double), _col11 (type: bigint), (- ((- (-26.28D + _col2)) / 10.175D)) (type: double), (-1.389D % power(((_col5 - ((_col6 * _col6) / _col7)) / CASE WHEN ((_col7 = 1L)) THEN (null) ELSE ((_col7 - 1)) END), 0.5)) (type: double), (UDFToDouble(_col1) - _col2) (type: double), ((_col9 - ((_col10 * _col10) / _col11)) / _col11) (type: double), (((_col9 - ((_col10 * _col10) / _col11)) / _col11) % 10.175D) (type: double), ((_col9 - ((_col10 * _col10) / _col11)) / CASE WHEN ((_col11 = 1L)) THEN (null) ELSE ((_col11 - 1)) END) (type: double), (- (UDFToDouble(_col1) - _col2)) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21 @@ -185,7 +185,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [3, 1, 0, 4, 2, 12, 14, 13, 15, 8, 19, 20, 21, 22, 11, 24, 25, 23, 29, 28, 31, 34] selectExpressions: DoubleScalarAddDoubleColumn(val -26.28, col 2:double) -> 12:double, DoubleColUnaryMinus(col 13:double)(children: DoubleScalarAddDoubleColumn(val -26.28, col 2:double) -> 13:double) -> 14:double, FuncPowerDoubleToDouble(col 15:double)(children: DoubleColDivideLongColumn(col 13:double, col 18:bigint)(children: DoubleColSubtractDoubleColumn(col 5:double, col 15:double)(children: DoubleColDivideLongColumn(col 13:double, col 7:bigint)(children: DoubleColMultiplyDoubleColumn(col 6:double, col 6:double) -> 13:double) -> 15:double) -> 13:double, IfExprNullCondExpr(col 16:boolean, null, col 17:bigint)(children: LongColEqualLongScalar(col 7:bigint, val 1) -> 16:boolean, LongColSubtractLongScalar(col 7:bigint, val 1) -> 17:bigint) -> 18:bigint) -> 15:double) -> 13:double, DoubleColMultiplyDoubleScalar(col 1:float, val -26.280000686645508) -> 15:float, DoubleColUnaryMinus(col 1:float) -> 19:float, DoubleColUnaryMinus(col 8:float) -> 20:float, DoubleColDivideDoubleScalar(col 22:double, val 10.175)(children: DoubleColUnaryMinus(col 21:double)(children: DoubleScalarAddDoubleColumn(val -26.28, col 2:double) -> 21:double) -> 22:double) -> 21:double, FuncPowerDoubleToDouble(col 23:double)(children: DoubleColDivideLongColumn(col 22:double, col 11:bigint)(children: DoubleColSubtractDoubleColumn(col 9:double, col 23:double)(children: DoubleColDivideLongColumn(col 22:double, col 11:bigint)(children: DoubleColMultiplyDoubleColumn(col 10:double, col 10:double) -> 22:double) -> 23:double) -> 22:double) -> 23:double) -> 22:double, DoubleColUnaryMinus(col 23:double)(children: DoubleColDivideDoubleScalar(col 24:double, val 10.175)(children: DoubleColUnaryMinus(col 23:double)(children: DoubleScalarAddDoubleColumn(val -26.28, col 2:double) -> 23:double) -> 24:double) -> 23:double) -> 24:double, DoubleScalarModuloDoubleColumn(val -1.389, col 23:double)(children: FuncPowerDoubleToDouble(col 25:double)(children: DoubleColDivideLongColumn(col 23:double, col 27:bigint)(children: DoubleColSubtractDoubleColumn(col 5:double, col 25:double)(children: DoubleColDivideLongColumn(col 23:double, col 7:bigint)(children: DoubleColMultiplyDoubleColumn(col 6:double, col 6:double) -> 23:double) -> 25:double) -> 23:double, IfExprNullCondExpr(col 18:boolean, null, col 26:bigint)(children: LongColEqualLongScalar(col 7:bigint, val 1) -> 18:boolean, LongColSubtractLongScalar(col 7:bigint, val 1) -> 26:bigint) -> 27:bigint) -> 25:double) -> 23:double) -> 25:double, DoubleColSubtractDoubleColumn(col 1:double, col 2:double)(children: col 1:float) -> 23:double, DoubleColDivideLongColumn(col 28:double, col 11:bigint)(children: DoubleColSubtractDoubleColumn(col 9:double, col 29:double)(children: DoubleColDivideLongColumn(col 28:double, col 11:bigint)(children: DoubleColMultiplyDoubleColumn(col 10:double, col 10:double) -> 28:double) -> 29:double) -> 28:double) -> 29:double, DoubleColModuloDoubleScalar(col 30:double, val 10.175)(children: DoubleColDivideLongColumn(col 28:double, col 11:bigint)(children: DoubleColSubtractDoubleColumn(col 9:double, col 30:double)(children: DoubleColDivideLongColumn(col 28:double, col 11:bigint)(children: DoubleColMultiplyDoubleColumn(col 10:double, col 10:double) -> 28:double) -> 30:double) -> 28:double) -> 30:double) -> 28:double, DoubleColDivideLongColumn(col 30:double, col 33:bigint)(children: DoubleColSubtractDoubleColumn(col 9:double, col 31:double)(children: DoubleColDivideLongColumn(col 30:double, col 11:bigint)(children: DoubleColMultiplyDoubleColumn(col 10:double, col 10:double) -> 30:double) -> 31:double) -> 30:double, IfExprNullCondExpr(col 27:boolean, null, col 32:bigint)(children: LongColEqualLongScalar(col 11:bigint, val 1) -> 27:boolean, LongColSubtractLongScalar(col 11:bigint, val 1) -> 32:bigint) -> 33:bigint) -> 31:double, DoubleColUnaryMinus(col 30:double)(children: DoubleColSubtractDoubleColumn(col 1:double, col 2:double)(children: col 1:float) -> 30:double) -> 34:double - Statistics: Num rows: 303 Data size: 3636 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 303 Data size: 146741 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col2 (type: string), _col1 (type: float), _col4 (type: double), _col0 (type: timestamp) sort order: ++++ @@ -195,7 +195,7 @@ STAGE PLANS: native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true valueColumnNums: [4, 12, 14, 13, 15, 8, 19, 20, 21, 22, 11, 24, 25, 23, 29, 28, 31, 34] - Statistics: Num rows: 303 Data size: 3636 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 303 Data size: 146741 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: boolean), _col5 (type: double), _col6 (type: double), _col7 (type: double), _col8 (type: float), _col9 (type: float), _col10 (type: float), _col11 (type: float), _col12 (type: double), _col13 (type: double), _col14 (type: bigint), _col15 (type: double), _col16 (type: double), _col17 (type: double), _col18 (type: double), _col19 (type: double), _col20 (type: double), _col21 (type: double) Reducer 3 Execution mode: vectorized @@ -220,13 +220,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [3, 1, 0, 4, 2, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21] - Statistics: Num rows: 303 Data size: 3636 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 303 Data size: 146741 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 303 Data size: 3636 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 303 Data size: 146741 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/spark/parquet_vectorization_15.q.out b/ql/src/test/results/clientpositive/spark/parquet_vectorization_15.q.out index 8497a7e541..cb2a135036 100644 --- a/ql/src/test/results/clientpositive/spark/parquet_vectorization_15.q.out +++ b/ql/src/test/results/clientpositive/spark/parquet_vectorization_15.q.out @@ -81,7 +81,7 @@ STAGE PLANS: TableScan alias: alltypesparquet filterExpr: ((cstring2 like '%ss%') or (cstring1 like '10%') or ((cint >= -75) and (UDFToShort(ctinyint) = csmallint) and (cdouble >= -3728.0D))) (type: boolean) - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 5951030 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct] @@ -91,7 +91,7 @@ STAGE PLANS: native: true predicateExpression: FilterExprOrExpr(children: FilterStringColLikeStringScalar(col 7:string, pattern %ss%), FilterStringColLikeStringScalar(col 6:string, pattern 10%), FilterExprAndExpr(children: FilterLongColGreaterEqualLongScalar(col 2:int, val -75), FilterLongColEqualLongColumn(col 0:smallint, col 1:smallint)(children: col 0:tinyint), FilterDoubleColGreaterEqualDoubleScalar(col 5:double, val -3728.0))) predicate: (((cint >= -75) and (UDFToShort(ctinyint) = csmallint) and (cdouble >= -3728.0D)) or (cstring1 like '10%') or (cstring2 like '%ss%')) (type: boolean) - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 5951030 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cfloat (type: float), cboolean1 (type: boolean), cdouble (type: double), cstring1 (type: string), ctinyint (type: tinyint), cint (type: int), ctimestamp1 (type: timestamp), UDFToDouble(cfloat) (type: double), (UDFToDouble(cfloat) * UDFToDouble(cfloat)) (type: double), UDFToDouble(ctinyint) (type: double), (UDFToDouble(ctinyint) * UDFToDouble(ctinyint)) (type: double), UDFToDouble(cint) (type: double), (UDFToDouble(cint) * UDFToDouble(cint)) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 @@ -100,7 +100,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [4, 10, 5, 6, 0, 2, 8, 4, 13, 14, 17, 15, 19] selectExpressions: DoubleColMultiplyDoubleColumn(col 4:double, col 4:double)(children: col 4:float, col 4:float) -> 13:double, CastLongToDouble(col 0:tinyint) -> 14:double, DoubleColMultiplyDoubleColumn(col 15:double, col 16:double)(children: CastLongToDouble(col 0:tinyint) -> 15:double, CastLongToDouble(col 0:tinyint) -> 16:double) -> 17:double, CastLongToDouble(col 2:int) -> 15:double, DoubleColMultiplyDoubleColumn(col 16:double, col 18:double)(children: CastLongToDouble(col 2:int) -> 16:double, CastLongToDouble(col 2:int) -> 18:double) -> 19:double - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 5951030 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(_col8), sum(_col7), count(_col0), min(_col2), sum(_col10), sum(_col9), count(_col4), sum(_col12), sum(_col11), count(_col5) Group By Vectorization: @@ -114,7 +114,7 @@ STAGE PLANS: keys: _col0 (type: float), _col1 (type: boolean), _col2 (type: double), _col3 (type: string), _col4 (type: tinyint), _col5 (type: int), _col6 (type: timestamp) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16 - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 5951030 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: float), _col1 (type: boolean), _col2 (type: double), _col3 (type: string), _col4 (type: tinyint), _col5 (type: int), _col6 (type: timestamp) sort order: +++++++ @@ -125,7 +125,7 @@ STAGE PLANS: native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true valueColumnNums: [7, 8, 9, 10, 11, 12, 13, 14, 15, 16] - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 5951030 Basic stats: COMPLETE Column stats: NONE value expressions: _col7 (type: double), _col8 (type: double), _col9 (type: bigint), _col10 (type: double), _col11 (type: double), _col12 (type: double), _col13 (type: bigint), _col14 (type: double), _col15 (type: double), _col16 (type: bigint) Execution mode: vectorized Map Vectorization: @@ -154,15 +154,15 @@ STAGE PLANS: keys: KEY._col0 (type: float), KEY._col1 (type: boolean), KEY._col2 (type: double), KEY._col3 (type: string), KEY._col4 (type: tinyint), KEY._col5 (type: int), KEY._col6 (type: timestamp) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16 - Statistics: Num rows: 6144 Data size: 73728 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6144 Data size: 2975515 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: float), _col1 (type: boolean), _col2 (type: double), _col3 (type: string), _col4 (type: tinyint), _col5 (type: int), _col6 (type: timestamp), power(((_col7 - ((_col8 * _col8) / _col9)) / CASE WHEN ((_col9 = 1L)) THEN (null) ELSE ((_col9 - 1)) END), 0.5) (type: double), (-26.28 - CAST( _col5 AS decimal(10,0))) (type: decimal(13,2)), _col10 (type: double), (_col2 * 79.553D) (type: double), (33.0 % _col0) (type: float), power(((_col11 - ((_col12 * _col12) / _col13)) / CASE WHEN ((_col13 = 1L)) THEN (null) ELSE ((_col13 - 1)) END), 0.5) (type: double), ((_col11 - ((_col12 * _col12) / _col13)) / _col13) (type: double), (-23.0D % _col2) (type: double), (- _col4) (type: tinyint), ((_col14 - ((_col15 * _col15) / _col16)) / CASE WHEN ((_col16 = 1L)) THEN (null) ELSE ((_col16 - 1)) END) (type: double), (UDFToFloat(_col5) - _col0) (type: float), (-23 % UDFToInteger(_col4)) (type: int), (- (-26.28 - CAST( _col5 AS decimal(10,0)))) (type: decimal(13,2)), power(((_col14 - ((_col15 * _col15) / _col16)) / _col16), 0.5) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 - Statistics: Num rows: 6144 Data size: 73728 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6144 Data size: 2975515 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: float), _col1 (type: boolean), _col2 (type: double), _col3 (type: string), _col4 (type: tinyint), _col5 (type: int), _col6 (type: timestamp) sort order: +++++++ - Statistics: Num rows: 6144 Data size: 73728 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6144 Data size: 2975515 Basic stats: COMPLETE Column stats: NONE value expressions: _col7 (type: double), _col8 (type: decimal(13,2)), _col9 (type: double), _col10 (type: double), _col11 (type: float), _col12 (type: double), _col13 (type: double), _col14 (type: double), _col15 (type: tinyint), _col16 (type: double), _col17 (type: float), _col18 (type: int), _col19 (type: decimal(13,2)), _col20 (type: double) Reducer 3 Reduce Vectorization: @@ -173,10 +173,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: float), KEY.reducesinkkey1 (type: boolean), KEY.reducesinkkey2 (type: double), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: tinyint), KEY.reducesinkkey5 (type: int), KEY.reducesinkkey6 (type: timestamp), VALUE._col0 (type: double), VALUE._col1 (type: decimal(13,2)), VALUE._col2 (type: double), VALUE._col3 (type: double), VALUE._col4 (type: float), VALUE._col5 (type: double), VALUE._col6 (type: double), VALUE._col7 (type: double), VALUE._col8 (type: tinyint), VALUE._col9 (type: double), VALUE._col10 (type: float), VALUE._col11 (type: int), VALUE._col12 (type: decimal(13,2)), VALUE._col13 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 - Statistics: Num rows: 6144 Data size: 73728 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6144 Data size: 2975515 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 6144 Data size: 73728 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6144 Data size: 2975515 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/spark/parquet_vectorization_16.q.out b/ql/src/test/results/clientpositive/spark/parquet_vectorization_16.q.out index a85f4d32ab..6e50cbc5ce 100644 --- a/ql/src/test/results/clientpositive/spark/parquet_vectorization_16.q.out +++ b/ql/src/test/results/clientpositive/spark/parquet_vectorization_16.q.out @@ -58,7 +58,7 @@ STAGE PLANS: TableScan alias: alltypesparquet filterExpr: ((cstring2 like '%b%') and ((cdouble >= -1.389D) or (cstring1 < 'a'))) (type: boolean) - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 5951030 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct] @@ -68,7 +68,7 @@ STAGE PLANS: native: true predicateExpression: FilterExprAndExpr(children: FilterStringColLikeStringScalar(col 7:string, pattern %b%), FilterExprOrExpr(children: FilterDoubleColGreaterEqualDoubleScalar(col 5:double, val -1.389), FilterStringGroupColLessStringScalar(col 6:string, val a))) predicate: (((cdouble >= -1.389D) or (cstring1 < 'a')) and (cstring2 like '%b%')) (type: boolean) - Statistics: Num rows: 4096 Data size: 49152 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4096 Data size: 1983676 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cstring1 (type: string), cdouble (type: double), ctimestamp1 (type: timestamp), (cdouble * cdouble) (type: double) outputColumnNames: _col0, _col1, _col2, _col3 @@ -77,7 +77,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [6, 5, 8, 13] selectExpressions: DoubleColMultiplyDoubleColumn(col 5:double, col 5:double) -> 13:double - Statistics: Num rows: 4096 Data size: 49152 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4096 Data size: 1983676 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(_col1), sum(_col3), sum(_col1), min(_col1) Group By Vectorization: @@ -91,7 +91,7 @@ STAGE PLANS: keys: _col0 (type: string), _col1 (type: double), _col2 (type: timestamp) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 4096 Data size: 49152 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4096 Data size: 1983676 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: double), _col2 (type: timestamp) sort order: +++ @@ -102,7 +102,7 @@ STAGE PLANS: native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true valueColumnNums: [3, 4, 5, 6] - Statistics: Num rows: 4096 Data size: 49152 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4096 Data size: 1983676 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: bigint), _col4 (type: double), _col5 (type: double), _col6 (type: double) Execution mode: vectorized Map Vectorization: @@ -149,7 +149,7 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: double), KEY._col2 (type: timestamp) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 2048 Data size: 24576 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2048 Data size: 991838 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: double), _col2 (type: timestamp), (_col1 - 9763215.5639D) (type: double), (- (_col1 - 9763215.5639D)) (type: double), _col3 (type: bigint), power(((_col4 - ((_col5 * _col5) / _col3)) / CASE WHEN ((_col3 = 1L)) THEN (null) ELSE ((_col3 - 1)) END), 0.5) (type: double), (- power(((_col4 - ((_col5 * _col5) / _col3)) / CASE WHEN ((_col3 = 1L)) THEN (null) ELSE ((_col3 - 1)) END), 0.5)) (type: double), (power(((_col4 - ((_col5 * _col5) / _col3)) / CASE WHEN ((_col3 = 1L)) THEN (null) ELSE ((_col3 - 1)) END), 0.5) * UDFToDouble(_col3)) (type: double), _col6 (type: double), (9763215.5639D / _col1) (type: double), (CAST( _col3 AS decimal(19,0)) / -1.389) (type: decimal(28,6)), power(((_col4 - ((_col5 * _col5) / _col3)) / CASE WHEN ((_col3 = 1L)) THEN (null) ELSE ((_col3 - 1)) END), 0.5) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 @@ -158,13 +158,13 @@ STAGE PLANS: native: true projectedOutputColumnNums: [0, 1, 2, 7, 9, 3, 8, 14, 20, 6, 10, 22, 17] selectExpressions: DoubleColSubtractDoubleScalar(col 1:double, val 9763215.5639) -> 7:double, DoubleColUnaryMinus(col 8:double)(children: DoubleColSubtractDoubleScalar(col 1:double, val 9763215.5639) -> 8:double) -> 9:double, FuncPowerDoubleToDouble(col 10:double)(children: DoubleColDivideLongColumn(col 8:double, col 13:bigint)(children: DoubleColSubtractDoubleColumn(col 4:double, col 10:double)(children: DoubleColDivideLongColumn(col 8:double, col 3:bigint)(children: DoubleColMultiplyDoubleColumn(col 5:double, col 5:double) -> 8:double) -> 10:double) -> 8:double, IfExprNullCondExpr(col 11:boolean, null, col 12:bigint)(children: LongColEqualLongScalar(col 3:bigint, val 1) -> 11:boolean, LongColSubtractLongScalar(col 3:bigint, val 1) -> 12:bigint) -> 13:bigint) -> 10:double) -> 8:double, DoubleColUnaryMinus(col 10:double)(children: FuncPowerDoubleToDouble(col 14:double)(children: DoubleColDivideLongColumn(col 10:double, col 16:bigint)(children: DoubleColSubtractDoubleColumn(col 4:double, col 14:double)(children: DoubleColDivideLongColumn(col 10:double, col 3:bigint)(children: DoubleColMultiplyDoubleColumn(col 5:double, col 5:double) -> 10:double) -> 14:double) -> 10:double, IfExprNullCondExpr(col 13:boolean, null, col 15:bigint)(children: LongColEqualLongScalar(col 3:bigint, val 1) -> 13:boolean, LongColSubtractLongScalar(col 3:bigint, val 1) -> 15:bigint) -> 16:bigint) -> 14:double) -> 10:double) -> 14:double, DoubleColMultiplyDoubleColumn(col 10:double, col 17:double)(children: FuncPowerDoubleToDouble(col 17:double)(children: DoubleColDivideLongColumn(col 10:double, col 19:bigint)(children: DoubleColSubtractDoubleColumn(col 4:double, col 17:double)(children: DoubleColDivideLongColumn(col 10:double, col 3:bigint)(children: DoubleColMultiplyDoubleColumn(col 5:double, col 5:double) -> 10:double) -> 17:double) -> 10:double, IfExprNullCondExpr(col 16:boolean, null, col 18:bigint)(children: LongColEqualLongScalar(col 3:bigint, val 1) -> 16:boolean, LongColSubtractLongScalar(col 3:bigint, val 1) -> 18:bigint) -> 19:bigint) -> 17:double) -> 10:double, CastLongToDouble(col 3:bigint) -> 17:double) -> 20:double, DoubleScalarDivideDoubleColumn(val 9763215.5639, col 1:double) -> 10:double, DecimalColDivideDecimalScalar(col 21:decimal(19,0), val -1.389)(children: CastLongToDecimal(col 3:bigint) -> 21:decimal(19,0)) -> 22:decimal(28,6), FuncPowerDoubleToDouble(col 23:double)(children: DoubleColDivideLongColumn(col 17:double, col 25:bigint)(children: DoubleColSubtractDoubleColumn(col 4:double, col 23:double)(children: DoubleColDivideLongColumn(col 17:double, col 3:bigint)(children: DoubleColMultiplyDoubleColumn(col 5:double, col 5:double) -> 17:double) -> 23:double) -> 17:double, IfExprNullCondExpr(col 19:boolean, null, col 24:bigint)(children: LongColEqualLongScalar(col 3:bigint, val 1) -> 19:boolean, LongColSubtractLongScalar(col 3:bigint, val 1) -> 24:bigint) -> 25:bigint) -> 23:double) -> 17:double - Statistics: Num rows: 2048 Data size: 24576 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2048 Data size: 991838 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 2048 Data size: 24576 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2048 Data size: 991838 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/spark/parquet_vectorization_17.q.out b/ql/src/test/results/clientpositive/spark/parquet_vectorization_17.q.out index 4ee2961cc2..9e79410d06 100644 --- a/ql/src/test/results/clientpositive/spark/parquet_vectorization_17.q.out +++ b/ql/src/test/results/clientpositive/spark/parquet_vectorization_17.q.out @@ -66,7 +66,7 @@ STAGE PLANS: TableScan alias: alltypesparquet filterExpr: ((cbigint > -23L) and ((cdouble <> 988888.0D) or (CAST( cint AS decimal(13,3)) > -863.257)) and ((ctinyint >= 33Y) or (UDFToLong(csmallint) >= cbigint) or (UDFToDouble(cfloat) = cdouble))) (type: boolean) - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 5951030 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct] @@ -76,7 +76,7 @@ STAGE PLANS: native: true predicateExpression: FilterExprAndExpr(children: FilterLongColGreaterLongScalar(col 3:bigint, val -23), FilterExprOrExpr(children: FilterDoubleColNotEqualDoubleScalar(col 5:double, val 988888.0), FilterDecimalColGreaterDecimalScalar(col 13:decimal(13,3), val -863.257)(children: CastLongToDecimal(col 2:int) -> 13:decimal(13,3))), FilterExprOrExpr(children: FilterLongColGreaterEqualLongScalar(col 0:tinyint, val 33), FilterLongColGreaterEqualLongColumn(col 1:bigint, col 3:bigint)(children: col 1:smallint), FilterDoubleColEqualDoubleColumn(col 4:double, col 5:double)(children: col 4:float))) predicate: (((cdouble <> 988888.0D) or (CAST( cint AS decimal(13,3)) > -863.257)) and ((ctinyint >= 33Y) or (UDFToLong(csmallint) >= cbigint) or (UDFToDouble(cfloat) = cdouble)) and (cbigint > -23L)) (type: boolean) - Statistics: Num rows: 4096 Data size: 49152 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4096 Data size: 1983676 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cfloat (type: float), cstring1 (type: string), cint (type: int), ctimestamp1 (type: timestamp), cdouble (type: double), cbigint (type: bigint), (UDFToDouble(cfloat) / UDFToDouble(ctinyint)) (type: double), (UDFToLong(cint) % cbigint) (type: bigint), (- cdouble) (type: double), (cdouble + (UDFToDouble(cfloat) / UDFToDouble(ctinyint))) (type: double), (cdouble / UDFToDouble(cint)) (type: double), (- (- cdouble)) (type: double), (9763215.5639 % CAST( cbigint AS decimal(19,0))) (type: decimal(11,4)), (2563.58D + (- (- cdouble))) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 @@ -85,7 +85,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [4, 6, 2, 8, 5, 3, 15, 16, 14, 17, 19, 20, 22, 18] selectExpressions: DoubleColDivideDoubleColumn(col 4:double, col 14:double)(children: col 4:float, CastLongToDouble(col 0:tinyint) -> 14:double) -> 15:double, LongColModuloLongColumn(col 2:bigint, col 3:bigint)(children: col 2:int) -> 16:bigint, DoubleColUnaryMinus(col 5:double) -> 14:double, DoubleColAddDoubleColumn(col 5:double, col 18:double)(children: DoubleColDivideDoubleColumn(col 4:double, col 17:double)(children: col 4:float, CastLongToDouble(col 0:tinyint) -> 17:double) -> 18:double) -> 17:double, DoubleColDivideDoubleColumn(col 5:double, col 18:double)(children: CastLongToDouble(col 2:int) -> 18:double) -> 19:double, DoubleColUnaryMinus(col 18:double)(children: DoubleColUnaryMinus(col 5:double) -> 18:double) -> 20:double, DecimalScalarModuloDecimalColumn(val 9763215.5639, col 21:decimal(19,0))(children: CastLongToDecimal(col 3:bigint) -> 21:decimal(19,0)) -> 22:decimal(11,4), DoubleScalarAddDoubleColumn(val 2563.58, col 23:double)(children: DoubleColUnaryMinus(col 18:double)(children: DoubleColUnaryMinus(col 5:double) -> 18:double) -> 23:double) -> 18:double - Statistics: Num rows: 4096 Data size: 49152 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4096 Data size: 1983676 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col5 (type: bigint), _col0 (type: float) sort order: ++ @@ -95,7 +95,7 @@ STAGE PLANS: native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true valueColumnNums: [6, 2, 8, 5, 15, 16, 14, 17, 19, 20, 22, 18] - Statistics: Num rows: 4096 Data size: 49152 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4096 Data size: 1983676 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string), _col2 (type: int), _col3 (type: timestamp), _col4 (type: double), _col6 (type: double), _col7 (type: bigint), _col8 (type: double), _col9 (type: double), _col10 (type: double), _col11 (type: double), _col12 (type: decimal(11,4)), _col13 (type: double) Execution mode: vectorized Map Vectorization: @@ -136,13 +136,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [1, 2, 3, 4, 5, 0, 6, 7, 8, 9, 10, 11, 12, 13] - Statistics: Num rows: 4096 Data size: 49152 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4096 Data size: 1983676 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 4096 Data size: 49152 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4096 Data size: 1983676 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/spark/parquet_vectorization_2.q.out b/ql/src/test/results/clientpositive/spark/parquet_vectorization_2.q.out index e5a82097cf..a0073686cc 100644 --- a/ql/src/test/results/clientpositive/spark/parquet_vectorization_2.q.out +++ b/ql/src/test/results/clientpositive/spark/parquet_vectorization_2.q.out @@ -64,7 +64,7 @@ STAGE PLANS: TableScan alias: alltypesparquet filterExpr: (((ctimestamp1 < ctimestamp2) and (cstring2 like 'b%') and (cfloat <= -5638.15)) or ((cdouble < UDFToDouble(ctinyint)) and ((UDFToDouble(ctimestamp2) <> -10669.0D) or (cint < 359)))) (type: boolean) - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 5951030 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct] @@ -74,7 +74,7 @@ STAGE PLANS: native: true predicateExpression: FilterExprOrExpr(children: FilterExprAndExpr(children: FilterTimestampColLessTimestampColumn(col 8:timestamp, col 9:timestamp), FilterStringColLikeStringScalar(col 7:string, pattern b%), FilterDoubleColLessEqualDoubleScalar(col 4:float, val -5638.14990234375)), FilterExprAndExpr(children: FilterDoubleColLessDoubleColumn(col 5:double, col 13:double)(children: CastLongToDouble(col 0:tinyint) -> 13:double), FilterExprOrExpr(children: FilterDoubleColNotEqualDoubleScalar(col 13:double, val -10669.0)(children: CastTimestampToDouble(col 9:timestamp) -> 13:double), FilterLongColLessLongScalar(col 2:int, val 359)))) predicate: (((cdouble < UDFToDouble(ctinyint)) and ((UDFToDouble(ctimestamp2) <> -10669.0D) or (cint < 359))) or ((ctimestamp1 < ctimestamp2) and (cstring2 like 'b%') and (cfloat <= -5638.15))) (type: boolean) - Statistics: Num rows: 4778 Data size: 57336 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4778 Data size: 2313966 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: csmallint (type: smallint), cfloat (type: float), cbigint (type: bigint), ctinyint (type: tinyint), cdouble (type: double), UDFToDouble(cbigint) (type: double), (UDFToDouble(cbigint) * UDFToDouble(cbigint)) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 @@ -83,7 +83,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [1, 4, 3, 0, 5, 13, 16] selectExpressions: CastLongToDouble(col 3:bigint) -> 13:double, DoubleColMultiplyDoubleColumn(col 14:double, col 15:double)(children: CastLongToDouble(col 3:bigint) -> 14:double, CastLongToDouble(col 3:bigint) -> 15:double) -> 16:double - Statistics: Num rows: 4778 Data size: 57336 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4778 Data size: 2313966 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(_col0), count(_col0), sum(_col1), sum(_col6), sum(_col5), count(_col2), count(), min(_col3), sum(_col4), count(_col4) Group By Vectorization: diff --git a/ql/src/test/results/clientpositive/spark/parquet_vectorization_3.q.out b/ql/src/test/results/clientpositive/spark/parquet_vectorization_3.q.out index 8c2cf06f82..a8da697e81 100644 --- a/ql/src/test/results/clientpositive/spark/parquet_vectorization_3.q.out +++ b/ql/src/test/results/clientpositive/spark/parquet_vectorization_3.q.out @@ -69,7 +69,7 @@ STAGE PLANS: TableScan alias: alltypesparquet filterExpr: (((UDFToFloat(cint) <= cfloat) and (CAST( cbigint AS decimal(22,3)) <> 79.553) and (UDFToDouble(ctimestamp2) = -29071.0D)) or ((UDFToDouble(cbigint) > cdouble) and (CAST( csmallint AS decimal(8,3)) >= 79.553) and (ctimestamp1 > ctimestamp2))) (type: boolean) - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 5951030 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct] @@ -79,7 +79,7 @@ STAGE PLANS: native: true predicateExpression: FilterExprOrExpr(children: FilterExprAndExpr(children: FilterDoubleColLessEqualDoubleColumn(col 13:float, col 4:float)(children: CastLongToFloatViaLongToDouble(col 2:int) -> 13:float), FilterDecimalColNotEqualDecimalScalar(col 14:decimal(22,3), val 79.553)(children: CastLongToDecimal(col 3:bigint) -> 14:decimal(22,3)), FilterDoubleColEqualDoubleScalar(col 13:double, val -29071.0)(children: CastTimestampToDouble(col 9:timestamp) -> 13:double)), FilterExprAndExpr(children: FilterDoubleColGreaterDoubleColumn(col 13:double, col 5:double)(children: CastLongToDouble(col 3:bigint) -> 13:double), FilterDecimalColGreaterEqualDecimalScalar(col 15:decimal(8,3), val 79.553)(children: CastLongToDecimal(col 1:smallint) -> 15:decimal(8,3)), FilterTimestampColGreaterTimestampColumn(col 8:timestamp, col 9:timestamp))) predicate: (((UDFToDouble(cbigint) > cdouble) and (CAST( csmallint AS decimal(8,3)) >= 79.553) and (ctimestamp1 > ctimestamp2)) or ((UDFToFloat(cint) <= cfloat) and (CAST( cbigint AS decimal(22,3)) <> 79.553) and (UDFToDouble(ctimestamp2) = -29071.0D))) (type: boolean) - Statistics: Num rows: 2503 Data size: 30036 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2503 Data size: 1212193 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: csmallint (type: smallint), ctinyint (type: tinyint), cfloat (type: float), cint (type: int), UDFToDouble(csmallint) (type: double), (UDFToDouble(csmallint) * UDFToDouble(csmallint)) (type: double), UDFToDouble(ctinyint) (type: double), (UDFToDouble(ctinyint) * UDFToDouble(ctinyint)) (type: double), UDFToDouble(cfloat) (type: double), (UDFToDouble(cfloat) * UDFToDouble(cfloat)) (type: double), UDFToDouble(cint) (type: double), (UDFToDouble(cint) * UDFToDouble(cint)) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 @@ -88,7 +88,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [1, 0, 4, 2, 13, 18, 16, 20, 4, 17, 19, 23] selectExpressions: CastLongToDouble(col 1:smallint) -> 13:double, DoubleColMultiplyDoubleColumn(col 16:double, col 17:double)(children: CastLongToDouble(col 1:smallint) -> 16:double, CastLongToDouble(col 1:smallint) -> 17:double) -> 18:double, CastLongToDouble(col 0:tinyint) -> 16:double, DoubleColMultiplyDoubleColumn(col 17:double, col 19:double)(children: CastLongToDouble(col 0:tinyint) -> 17:double, CastLongToDouble(col 0:tinyint) -> 19:double) -> 20:double, DoubleColMultiplyDoubleColumn(col 4:double, col 4:double)(children: col 4:float, col 4:float) -> 17:double, CastLongToDouble(col 2:int) -> 19:double, DoubleColMultiplyDoubleColumn(col 21:double, col 22:double)(children: CastLongToDouble(col 2:int) -> 21:double, CastLongToDouble(col 2:int) -> 22:double) -> 23:double - Statistics: Num rows: 2503 Data size: 30036 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2503 Data size: 1212193 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(_col5), sum(_col4), count(_col0), sum(_col7), sum(_col6), count(_col1), sum(_col9), sum(_col8), count(_col2), sum(_col2), sum(_col3), count(_col3), sum(_col11), sum(_col10) Group By Vectorization: diff --git a/ql/src/test/results/clientpositive/spark/parquet_vectorization_4.q.out b/ql/src/test/results/clientpositive/spark/parquet_vectorization_4.q.out index adf6d603b5..8a3fe8f754 100644 --- a/ql/src/test/results/clientpositive/spark/parquet_vectorization_4.q.out +++ b/ql/src/test/results/clientpositive/spark/parquet_vectorization_4.q.out @@ -64,7 +64,7 @@ STAGE PLANS: TableScan alias: alltypesparquet filterExpr: ((UDFToInteger(csmallint) >= cint) or ((UDFToInteger(ctinyint) <= -89010) and (cdouble > 79.553D)) or ((cbigint <> -563L) and ((UDFToLong(ctinyint) <> cbigint) or (cdouble <= -3728.0D)))) (type: boolean) - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 5951030 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct] @@ -74,7 +74,7 @@ STAGE PLANS: native: true predicateExpression: FilterExprOrExpr(children: FilterLongColGreaterEqualLongColumn(col 1:int, col 2:int)(children: col 1:smallint), FilterExprAndExpr(children: FilterLongColLessEqualLongScalar(col 0:int, val -89010)(children: col 0:tinyint), FilterDoubleColGreaterDoubleScalar(col 5:double, val 79.553)), FilterExprAndExpr(children: FilterLongColNotEqualLongScalar(col 3:bigint, val -563), FilterExprOrExpr(children: FilterLongColNotEqualLongColumn(col 0:bigint, col 3:bigint)(children: col 0:tinyint), FilterDoubleColLessEqualDoubleScalar(col 5:double, val -3728.0)))) predicate: (((UDFToInteger(ctinyint) <= -89010) and (cdouble > 79.553D)) or ((cbigint <> -563L) and ((UDFToLong(ctinyint) <> cbigint) or (cdouble <= -3728.0D))) or (UDFToInteger(csmallint) >= cint)) (type: boolean) - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 5951030 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cint (type: int), cdouble (type: double), ctinyint (type: tinyint), (cdouble * cdouble) (type: double) outputColumnNames: _col0, _col1, _col2, _col3 @@ -83,7 +83,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [2, 5, 0, 13] selectExpressions: DoubleColMultiplyDoubleColumn(col 5:double, col 5:double) -> 13:double - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 5951030 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(_col0), sum(_col3), sum(_col1), count(_col1), min(_col2) Group By Vectorization: diff --git a/ql/src/test/results/clientpositive/spark/parquet_vectorization_5.q.out b/ql/src/test/results/clientpositive/spark/parquet_vectorization_5.q.out index 5f2e2ca298..47a6d07d63 100644 --- a/ql/src/test/results/clientpositive/spark/parquet_vectorization_5.q.out +++ b/ql/src/test/results/clientpositive/spark/parquet_vectorization_5.q.out @@ -58,7 +58,7 @@ STAGE PLANS: TableScan alias: alltypesparquet filterExpr: ((cboolean2 is not null and (cstring1 like '%b%')) or ((UDFToDouble(ctinyint) = cdouble) and ctimestamp2 is not null and (cstring2 like 'a'))) (type: boolean) - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 5951030 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct] @@ -68,7 +68,7 @@ STAGE PLANS: native: true predicateExpression: FilterExprOrExpr(children: FilterExprAndExpr(children: SelectColumnIsNotNull(col 11:boolean), FilterStringColLikeStringScalar(col 6:string, pattern %b%)), FilterExprAndExpr(children: FilterDoubleColEqualDoubleColumn(col 13:double, col 5:double)(children: CastLongToDouble(col 0:tinyint) -> 13:double), SelectColumnIsNotNull(col 9:timestamp), FilterStringColLikeStringScalar(col 7:string, pattern a))) predicate: (((UDFToDouble(ctinyint) = cdouble) and ctimestamp2 is not null and (cstring2 like 'a')) or (cboolean2 is not null and (cstring1 like '%b%'))) (type: boolean) - Statistics: Num rows: 9216 Data size: 110592 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 9216 Data size: 4463272 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int) outputColumnNames: ctinyint, csmallint, cint @@ -76,7 +76,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1, 2] - Statistics: Num rows: 9216 Data size: 110592 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 9216 Data size: 4463272 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: max(csmallint), count(), min(csmallint), sum(cint), max(ctinyint) Group By Vectorization: diff --git a/ql/src/test/results/clientpositive/spark/parquet_vectorization_6.q.out b/ql/src/test/results/clientpositive/spark/parquet_vectorization_6.q.out index 5b7f309b56..62cb622281 100644 --- a/ql/src/test/results/clientpositive/spark/parquet_vectorization_6.q.out +++ b/ql/src/test/results/clientpositive/spark/parquet_vectorization_6.q.out @@ -58,7 +58,7 @@ STAGE PLANS: TableScan alias: alltypesparquet filterExpr: ((ctinyint <> 0Y) and (((cboolean1 <= 0) and (cboolean2 >= cboolean1)) or (cbigint is not null and ((cstring2 like '%a') or (cfloat <= -257))))) (type: boolean) - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 5951030 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct] @@ -68,7 +68,7 @@ STAGE PLANS: native: true predicateExpression: FilterExprAndExpr(children: FilterLongColNotEqualLongScalar(col 0:tinyint, val 0), FilterExprOrExpr(children: FilterExprAndExpr(children: FilterLongColLessEqualLongScalar(col 10:boolean, val 0), FilterLongColGreaterEqualLongColumn(col 11:boolean, col 10:boolean)), FilterExprAndExpr(children: SelectColumnIsNotNull(col 3:bigint), FilterExprOrExpr(children: FilterStringColLikeStringScalar(col 7:string, pattern %a), FilterDoubleColLessEqualDoubleScalar(col 4:float, val -257.0))))) predicate: ((((cboolean1 <= 0) and (cboolean2 >= cboolean1)) or (cbigint is not null and ((cstring2 like '%a') or (cfloat <= -257)))) and (ctinyint <> 0Y)) (type: boolean) - Statistics: Num rows: 11605 Data size: 139260 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 11605 Data size: 5620255 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cboolean1 (type: boolean), cfloat (type: float), cstring1 (type: string), (988888 * UDFToInteger(csmallint)) (type: int), (- csmallint) (type: smallint), (- cfloat) (type: float), (-26.28D / UDFToDouble(cfloat)) (type: double), (cfloat * 359.0) (type: float), (cint % UDFToInteger(ctinyint)) (type: int), (- cdouble) (type: double), (UDFToInteger(ctinyint) - -75) (type: int), (762 * (cint % UDFToInteger(ctinyint))) (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 @@ -77,13 +77,13 @@ STAGE PLANS: native: true projectedOutputColumnNums: [10, 4, 6, 13, 14, 15, 16, 17, 18, 19, 20, 22] selectExpressions: LongScalarMultiplyLongColumn(val 988888, col 1:int)(children: col 1:smallint) -> 13:int, LongColUnaryMinus(col 1:smallint) -> 14:smallint, DoubleColUnaryMinus(col 4:float) -> 15:float, DoubleScalarDivideDoubleColumn(val -26.28, col 4:double)(children: col 4:float) -> 16:double, DoubleColMultiplyDoubleScalar(col 4:float, val 359.0) -> 17:float, LongColModuloLongColumn(col 2:int, col 0:int)(children: col 0:tinyint) -> 18:int, DoubleColUnaryMinus(col 5:double) -> 19:double, LongColSubtractLongScalar(col 0:int, val -75)(children: col 0:tinyint) -> 20:int, LongScalarMultiplyLongColumn(val 762, col 21:int)(children: LongColModuloLongColumn(col 2:int, col 0:int)(children: col 0:tinyint) -> 21:int) -> 22:int - Statistics: Num rows: 11605 Data size: 139260 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 11605 Data size: 5620255 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 11605 Data size: 139260 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 11605 Data size: 5620255 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/spark/parquet_vectorization_7.q.out b/ql/src/test/results/clientpositive/spark/parquet_vectorization_7.q.out index 552b5d19c7..d106d8fc44 100644 --- a/ql/src/test/results/clientpositive/spark/parquet_vectorization_7.q.out +++ b/ql/src/test/results/clientpositive/spark/parquet_vectorization_7.q.out @@ -72,7 +72,7 @@ STAGE PLANS: TableScan alias: alltypesparquet filterExpr: ((ctinyint <> 0Y) and ((UDFToDouble(ctimestamp1) <= -28800.0D) or (UDFToInteger(ctinyint) = cint) or (cstring2 like 'ss')) and ((cdouble > 988888.0D) or ((UDFToDouble(ctimestamp2) > -28815.0D) and (cdouble <= 3569.0D)))) (type: boolean) - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 5951030 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct] @@ -82,7 +82,7 @@ STAGE PLANS: native: true predicateExpression: FilterExprAndExpr(children: FilterLongColNotEqualLongScalar(col 0:tinyint, val 0), FilterExprOrExpr(children: FilterDoubleColLessEqualDoubleScalar(col 13:double, val -28800.0)(children: CastTimestampToDouble(col 8:timestamp) -> 13:double), FilterLongColEqualLongColumn(col 0:int, col 2:int)(children: col 0:tinyint), FilterStringColLikeStringScalar(col 7:string, pattern ss)), FilterExprOrExpr(children: FilterDoubleColGreaterDoubleScalar(col 5:double, val 988888.0), FilterExprAndExpr(children: FilterDoubleColGreaterDoubleScalar(col 13:double, val -28815.0)(children: CastTimestampToDouble(col 9:timestamp) -> 13:double), FilterDoubleColLessEqualDoubleScalar(col 5:double, val 3569.0)))) predicate: (((UDFToDouble(ctimestamp1) <= -28800.0D) or (UDFToInteger(ctinyint) = cint) or (cstring2 like 'ss')) and ((cdouble > 988888.0D) or ((UDFToDouble(ctimestamp2) > -28815.0D) and (cdouble <= 3569.0D))) and (ctinyint <> 0Y)) (type: boolean) - Statistics: Num rows: 5461 Data size: 65532 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5461 Data size: 2644740 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cboolean1 (type: boolean), cbigint (type: bigint), csmallint (type: smallint), ctinyint (type: tinyint), ctimestamp1 (type: timestamp), cstring1 (type: string), (cbigint + cbigint) (type: bigint), (UDFToInteger(csmallint) % -257) (type: int), (- csmallint) (type: smallint), (- ctinyint) (type: tinyint), (UDFToInteger((- ctinyint)) + 17) (type: int), (cbigint * UDFToLong((- csmallint))) (type: bigint), (cint % UDFToInteger(csmallint)) (type: int), (- ctinyint) (type: tinyint), ((- ctinyint) % ctinyint) (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 @@ -91,7 +91,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [10, 3, 1, 0, 8, 6, 14, 15, 16, 17, 19, 20, 18, 21, 23] selectExpressions: LongColAddLongColumn(col 3:bigint, col 3:bigint) -> 14:bigint, LongColModuloLongScalar(col 1:int, val -257)(children: col 1:smallint) -> 15:int, LongColUnaryMinus(col 1:smallint) -> 16:smallint, LongColUnaryMinus(col 0:tinyint) -> 17:tinyint, LongColAddLongScalar(col 18:int, val 17)(children: col 18:tinyint) -> 19:int, LongColMultiplyLongColumn(col 3:bigint, col 18:bigint)(children: col 18:smallint) -> 20:bigint, LongColModuloLongColumn(col 2:int, col 1:int)(children: col 1:smallint) -> 18:int, LongColUnaryMinus(col 0:tinyint) -> 21:tinyint, LongColModuloLongColumn(col 22:tinyint, col 0:tinyint)(children: LongColUnaryMinus(col 0:tinyint) -> 22:tinyint) -> 23:tinyint - Statistics: Num rows: 5461 Data size: 65532 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5461 Data size: 2644740 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: boolean), _col1 (type: bigint), _col2 (type: smallint), _col3 (type: tinyint), _col4 (type: timestamp), _col5 (type: string), _col6 (type: bigint), _col7 (type: int), _col8 (type: smallint), _col9 (type: tinyint), _col10 (type: int), _col11 (type: bigint), _col12 (type: int), _col13 (type: tinyint), _col14 (type: tinyint) sort order: +++++++++++++++ @@ -101,7 +101,7 @@ STAGE PLANS: native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true valueColumnNums: [] - Statistics: Num rows: 5461 Data size: 65532 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5461 Data size: 2644740 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 Execution mode: vectorized Map Vectorization: @@ -142,19 +142,19 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 9, 14] - Statistics: Num rows: 5461 Data size: 65532 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5461 Data size: 2644740 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 25 Limit Vectorization: className: VectorLimitOperator native: true - Statistics: Num rows: 25 Data size: 300 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 12100 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 25 Data size: 300 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 12100 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -321,7 +321,7 @@ STAGE PLANS: TableScan alias: alltypesparquet filterExpr: ((ctinyint <> 0Y) and ((UDFToDouble(ctimestamp1) <= -28800.0D) or (UDFToInteger(ctinyint) = cint) or (cstring2 like 'ss')) and ((cdouble > 988888.0D) or ((UDFToDouble(ctimestamp2) > -28792.315D) and (cdouble <= 3569.0D)))) (type: boolean) - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 5951030 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true Filter Operator @@ -330,7 +330,7 @@ STAGE PLANS: native: true predicateExpression: FilterExprAndExpr(children: FilterLongColNotEqualLongScalar(col 0:tinyint, val 0), FilterExprOrExpr(children: FilterDoubleColLessEqualDoubleScalar(col 13:double, val -28800.0)(children: CastTimestampToDouble(col 8:timestamp) -> 13:double), FilterLongColEqualLongColumn(col 0:int, col 2:int)(children: col 0:tinyint), FilterStringColLikeStringScalar(col 7:string, pattern ss)), FilterExprOrExpr(children: FilterDoubleColGreaterDoubleScalar(col 5:double, val 988888.0), FilterExprAndExpr(children: FilterDoubleColGreaterDoubleScalar(col 13:double, val -28792.315)(children: CastTimestampToDouble(col 9:timestamp) -> 13:double), FilterDoubleColLessEqualDoubleScalar(col 5:double, val 3569.0)))) predicate: (((UDFToDouble(ctimestamp1) <= -28800.0D) or (UDFToInteger(ctinyint) = cint) or (cstring2 like 'ss')) and ((cdouble > 988888.0D) or ((UDFToDouble(ctimestamp2) > -28792.315D) and (cdouble <= 3569.0D))) and (ctinyint <> 0Y)) (type: boolean) - Statistics: Num rows: 5461 Data size: 65532 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5461 Data size: 2644740 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cboolean1 (type: boolean), cbigint (type: bigint), csmallint (type: smallint), ctinyint (type: tinyint), ctimestamp1 (type: timestamp), cstring1 (type: string), (cbigint + cbigint) (type: bigint), (UDFToInteger(csmallint) % -257) (type: int), (- csmallint) (type: smallint), (- ctinyint) (type: tinyint), (UDFToInteger((- ctinyint)) + 17) (type: int), (cbigint * UDFToLong((- csmallint))) (type: bigint), (cint % UDFToInteger(csmallint)) (type: int), (- ctinyint) (type: tinyint), ((- ctinyint) % ctinyint) (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 @@ -339,7 +339,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [10, 3, 1, 0, 8, 6, 14, 15, 16, 17, 19, 20, 18, 21, 23] selectExpressions: LongColAddLongColumn(col 3:bigint, col 3:bigint) -> 14:bigint, LongColModuloLongScalar(col 1:int, val -257)(children: col 1:smallint) -> 15:int, LongColUnaryMinus(col 1:smallint) -> 16:smallint, LongColUnaryMinus(col 0:tinyint) -> 17:tinyint, LongColAddLongScalar(col 18:int, val 17)(children: col 18:tinyint) -> 19:int, LongColMultiplyLongColumn(col 3:bigint, col 18:bigint)(children: col 18:smallint) -> 20:bigint, LongColModuloLongColumn(col 2:int, col 1:int)(children: col 1:smallint) -> 18:int, LongColUnaryMinus(col 0:tinyint) -> 21:tinyint, LongColModuloLongColumn(col 22:tinyint, col 0:tinyint)(children: LongColUnaryMinus(col 0:tinyint) -> 22:tinyint) -> 23:tinyint - Statistics: Num rows: 5461 Data size: 65532 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5461 Data size: 2644740 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: boolean), _col1 (type: bigint), _col2 (type: smallint), _col3 (type: tinyint), _col4 (type: timestamp), _col5 (type: string), _col6 (type: bigint), _col7 (type: int), _col8 (type: smallint), _col9 (type: tinyint), _col10 (type: int), _col11 (type: bigint), _col12 (type: int), _col13 (type: tinyint), _col14 (type: tinyint) sort order: +++++++++++++++ @@ -347,7 +347,7 @@ STAGE PLANS: className: VectorReduceSinkObjectHashOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 5461 Data size: 65532 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5461 Data size: 2644740 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 Execution mode: vectorized Map Vectorization: @@ -375,19 +375,19 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 9, 14] - Statistics: Num rows: 5461 Data size: 65532 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5461 Data size: 2644740 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 25 Limit Vectorization: className: VectorLimitOperator native: true - Statistics: Num rows: 25 Data size: 300 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 12100 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 25 Data size: 300 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 12100 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/spark/parquet_vectorization_8.q.out b/ql/src/test/results/clientpositive/spark/parquet_vectorization_8.q.out index 9802ef2652..0de629b0b2 100644 --- a/ql/src/test/results/clientpositive/spark/parquet_vectorization_8.q.out +++ b/ql/src/test/results/clientpositive/spark/parquet_vectorization_8.q.out @@ -68,7 +68,7 @@ STAGE PLANS: TableScan alias: alltypesparquet filterExpr: ((cstring2 is not null and (UDFToDouble(ctimestamp1) <= 10.0D) and (UDFToDouble(ctimestamp2) <> 16.0D)) or (cfloat < -6432) or (cboolean1 is not null and (cdouble = 988888.0D))) (type: boolean) - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 5951030 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct] @@ -78,7 +78,7 @@ STAGE PLANS: native: true predicateExpression: FilterExprOrExpr(children: FilterExprAndExpr(children: SelectColumnIsNotNull(col 7:string), FilterDoubleColLessEqualDoubleScalar(col 13:double, val 10.0)(children: CastTimestampToDouble(col 8:timestamp) -> 13:double), FilterDoubleColNotEqualDoubleScalar(col 13:double, val 16.0)(children: CastTimestampToDouble(col 9:timestamp) -> 13:double)), FilterDoubleColLessDoubleScalar(col 4:float, val -6432.0), FilterExprAndExpr(children: SelectColumnIsNotNull(col 10:boolean), FilterDoubleColEqualDoubleScalar(col 5:double, val 988888.0))) predicate: ((cboolean1 is not null and (cdouble = 988888.0D)) or (cfloat < -6432) or (cstring2 is not null and (UDFToDouble(ctimestamp1) <= 10.0D) and (UDFToDouble(ctimestamp2) <> 16.0D))) (type: boolean) - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 5951030 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ctimestamp1 (type: timestamp), cdouble (type: double), cboolean1 (type: boolean), cstring1 (type: string), cfloat (type: float), (- cdouble) (type: double), (-5638.15D - cdouble) (type: double), (cdouble * -257.0D) (type: double), (UDFToFloat(cint) + cfloat) (type: float), ((- cdouble) + UDFToDouble(cbigint)) (type: double), (- cdouble) (type: double), (-1.389 - cfloat) (type: float), (- cfloat) (type: float), ((-5638.15D - cdouble) + UDFToDouble((UDFToFloat(cint) + cfloat))) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 @@ -87,7 +87,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [8, 5, 10, 6, 4, 13, 14, 15, 17, 19, 16, 18, 20, 22] selectExpressions: DoubleColUnaryMinus(col 5:double) -> 13:double, DoubleScalarSubtractDoubleColumn(val -5638.15, col 5:double) -> 14:double, DoubleColMultiplyDoubleScalar(col 5:double, val -257.0) -> 15:double, DoubleColAddDoubleColumn(col 16:float, col 4:float)(children: CastLongToFloatViaLongToDouble(col 2:int) -> 16:float) -> 17:float, DoubleColAddDoubleColumn(col 16:double, col 18:double)(children: DoubleColUnaryMinus(col 5:double) -> 16:double, CastLongToDouble(col 3:bigint) -> 18:double) -> 19:double, DoubleColUnaryMinus(col 5:double) -> 16:double, DoubleScalarSubtractDoubleColumn(val -1.3890000581741333, col 4:float) -> 18:float, DoubleColUnaryMinus(col 4:float) -> 20:float, DoubleColAddDoubleColumn(col 21:double, col 23:double)(children: DoubleScalarSubtractDoubleColumn(val -5638.15, col 5:double) -> 21:double, col 23:float) -> 22:double - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 5951030 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: timestamp), _col1 (type: double), _col2 (type: boolean), _col3 (type: string), _col4 (type: float), _col5 (type: double), _col6 (type: double), _col7 (type: double), _col8 (type: float), _col9 (type: double), _col10 (type: double), _col11 (type: float), _col12 (type: float), _col13 (type: double) sort order: ++++++++++++++ @@ -97,7 +97,7 @@ STAGE PLANS: native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true valueColumnNums: [] - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 5951030 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 Execution mode: vectorized Map Vectorization: @@ -138,19 +138,19 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 5, 11, 12, 13] - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 5951030 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 20 Limit Vectorization: className: VectorLimitOperator native: true - Statistics: Num rows: 20 Data size: 240 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 9680 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 20 Data size: 240 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 9680 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -304,7 +304,7 @@ STAGE PLANS: TableScan alias: alltypesparquet filterExpr: ((cstring2 is not null and (UDFToDouble(ctimestamp1) <= 12.503D) and (UDFToDouble(ctimestamp2) <> 11.998D)) or (cfloat < -6432) or (cboolean1 is not null and (cdouble = 988888.0D))) (type: boolean) - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 5951030 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true Filter Operator @@ -313,7 +313,7 @@ STAGE PLANS: native: true predicateExpression: FilterExprOrExpr(children: FilterExprAndExpr(children: SelectColumnIsNotNull(col 7:string), FilterDoubleColLessEqualDoubleScalar(col 13:double, val 12.503)(children: CastTimestampToDouble(col 8:timestamp) -> 13:double), FilterDoubleColNotEqualDoubleScalar(col 13:double, val 11.998)(children: CastTimestampToDouble(col 9:timestamp) -> 13:double)), FilterDoubleColLessDoubleScalar(col 4:float, val -6432.0), FilterExprAndExpr(children: SelectColumnIsNotNull(col 10:boolean), FilterDoubleColEqualDoubleScalar(col 5:double, val 988888.0))) predicate: ((cboolean1 is not null and (cdouble = 988888.0D)) or (cfloat < -6432) or (cstring2 is not null and (UDFToDouble(ctimestamp1) <= 12.503D) and (UDFToDouble(ctimestamp2) <> 11.998D))) (type: boolean) - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 5951030 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ctimestamp1 (type: timestamp), cdouble (type: double), cboolean1 (type: boolean), cstring1 (type: string), cfloat (type: float), (- cdouble) (type: double), (-5638.15D - cdouble) (type: double), (cdouble * -257.0D) (type: double), (UDFToFloat(cint) + cfloat) (type: float), ((- cdouble) + UDFToDouble(cbigint)) (type: double), (- cdouble) (type: double), (-1.389 - cfloat) (type: float), (- cfloat) (type: float), ((-5638.15D - cdouble) + UDFToDouble((UDFToFloat(cint) + cfloat))) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 @@ -322,7 +322,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [8, 5, 10, 6, 4, 13, 14, 15, 17, 19, 16, 18, 20, 22] selectExpressions: DoubleColUnaryMinus(col 5:double) -> 13:double, DoubleScalarSubtractDoubleColumn(val -5638.15, col 5:double) -> 14:double, DoubleColMultiplyDoubleScalar(col 5:double, val -257.0) -> 15:double, DoubleColAddDoubleColumn(col 16:float, col 4:float)(children: CastLongToFloatViaLongToDouble(col 2:int) -> 16:float) -> 17:float, DoubleColAddDoubleColumn(col 16:double, col 18:double)(children: DoubleColUnaryMinus(col 5:double) -> 16:double, CastLongToDouble(col 3:bigint) -> 18:double) -> 19:double, DoubleColUnaryMinus(col 5:double) -> 16:double, DoubleScalarSubtractDoubleColumn(val -1.3890000581741333, col 4:float) -> 18:float, DoubleColUnaryMinus(col 4:float) -> 20:float, DoubleColAddDoubleColumn(col 21:double, col 23:double)(children: DoubleScalarSubtractDoubleColumn(val -5638.15, col 5:double) -> 21:double, col 23:float) -> 22:double - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 5951030 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: timestamp), _col1 (type: double), _col2 (type: boolean), _col3 (type: string), _col4 (type: float), _col5 (type: double), _col6 (type: double), _col7 (type: double), _col8 (type: float), _col9 (type: double), _col10 (type: double), _col11 (type: float), _col12 (type: float), _col13 (type: double) sort order: ++++++++++++++ @@ -330,7 +330,7 @@ STAGE PLANS: className: VectorReduceSinkObjectHashOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 5951030 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 Execution mode: vectorized Map Vectorization: @@ -358,19 +358,19 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 5, 11, 12, 13] - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 5951030 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 20 Limit Vectorization: className: VectorLimitOperator native: true - Statistics: Num rows: 20 Data size: 240 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 9680 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 20 Data size: 240 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 9680 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/spark/parquet_vectorization_9.q.out b/ql/src/test/results/clientpositive/spark/parquet_vectorization_9.q.out index a85f4d32ab..6e50cbc5ce 100644 --- a/ql/src/test/results/clientpositive/spark/parquet_vectorization_9.q.out +++ b/ql/src/test/results/clientpositive/spark/parquet_vectorization_9.q.out @@ -58,7 +58,7 @@ STAGE PLANS: TableScan alias: alltypesparquet filterExpr: ((cstring2 like '%b%') and ((cdouble >= -1.389D) or (cstring1 < 'a'))) (type: boolean) - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 5951030 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct] @@ -68,7 +68,7 @@ STAGE PLANS: native: true predicateExpression: FilterExprAndExpr(children: FilterStringColLikeStringScalar(col 7:string, pattern %b%), FilterExprOrExpr(children: FilterDoubleColGreaterEqualDoubleScalar(col 5:double, val -1.389), FilterStringGroupColLessStringScalar(col 6:string, val a))) predicate: (((cdouble >= -1.389D) or (cstring1 < 'a')) and (cstring2 like '%b%')) (type: boolean) - Statistics: Num rows: 4096 Data size: 49152 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4096 Data size: 1983676 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cstring1 (type: string), cdouble (type: double), ctimestamp1 (type: timestamp), (cdouble * cdouble) (type: double) outputColumnNames: _col0, _col1, _col2, _col3 @@ -77,7 +77,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [6, 5, 8, 13] selectExpressions: DoubleColMultiplyDoubleColumn(col 5:double, col 5:double) -> 13:double - Statistics: Num rows: 4096 Data size: 49152 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4096 Data size: 1983676 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(_col1), sum(_col3), sum(_col1), min(_col1) Group By Vectorization: @@ -91,7 +91,7 @@ STAGE PLANS: keys: _col0 (type: string), _col1 (type: double), _col2 (type: timestamp) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 4096 Data size: 49152 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4096 Data size: 1983676 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: double), _col2 (type: timestamp) sort order: +++ @@ -102,7 +102,7 @@ STAGE PLANS: native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true valueColumnNums: [3, 4, 5, 6] - Statistics: Num rows: 4096 Data size: 49152 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4096 Data size: 1983676 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: bigint), _col4 (type: double), _col5 (type: double), _col6 (type: double) Execution mode: vectorized Map Vectorization: @@ -149,7 +149,7 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: double), KEY._col2 (type: timestamp) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 2048 Data size: 24576 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2048 Data size: 991838 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: double), _col2 (type: timestamp), (_col1 - 9763215.5639D) (type: double), (- (_col1 - 9763215.5639D)) (type: double), _col3 (type: bigint), power(((_col4 - ((_col5 * _col5) / _col3)) / CASE WHEN ((_col3 = 1L)) THEN (null) ELSE ((_col3 - 1)) END), 0.5) (type: double), (- power(((_col4 - ((_col5 * _col5) / _col3)) / CASE WHEN ((_col3 = 1L)) THEN (null) ELSE ((_col3 - 1)) END), 0.5)) (type: double), (power(((_col4 - ((_col5 * _col5) / _col3)) / CASE WHEN ((_col3 = 1L)) THEN (null) ELSE ((_col3 - 1)) END), 0.5) * UDFToDouble(_col3)) (type: double), _col6 (type: double), (9763215.5639D / _col1) (type: double), (CAST( _col3 AS decimal(19,0)) / -1.389) (type: decimal(28,6)), power(((_col4 - ((_col5 * _col5) / _col3)) / CASE WHEN ((_col3 = 1L)) THEN (null) ELSE ((_col3 - 1)) END), 0.5) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 @@ -158,13 +158,13 @@ STAGE PLANS: native: true projectedOutputColumnNums: [0, 1, 2, 7, 9, 3, 8, 14, 20, 6, 10, 22, 17] selectExpressions: DoubleColSubtractDoubleScalar(col 1:double, val 9763215.5639) -> 7:double, DoubleColUnaryMinus(col 8:double)(children: DoubleColSubtractDoubleScalar(col 1:double, val 9763215.5639) -> 8:double) -> 9:double, FuncPowerDoubleToDouble(col 10:double)(children: DoubleColDivideLongColumn(col 8:double, col 13:bigint)(children: DoubleColSubtractDoubleColumn(col 4:double, col 10:double)(children: DoubleColDivideLongColumn(col 8:double, col 3:bigint)(children: DoubleColMultiplyDoubleColumn(col 5:double, col 5:double) -> 8:double) -> 10:double) -> 8:double, IfExprNullCondExpr(col 11:boolean, null, col 12:bigint)(children: LongColEqualLongScalar(col 3:bigint, val 1) -> 11:boolean, LongColSubtractLongScalar(col 3:bigint, val 1) -> 12:bigint) -> 13:bigint) -> 10:double) -> 8:double, DoubleColUnaryMinus(col 10:double)(children: FuncPowerDoubleToDouble(col 14:double)(children: DoubleColDivideLongColumn(col 10:double, col 16:bigint)(children: DoubleColSubtractDoubleColumn(col 4:double, col 14:double)(children: DoubleColDivideLongColumn(col 10:double, col 3:bigint)(children: DoubleColMultiplyDoubleColumn(col 5:double, col 5:double) -> 10:double) -> 14:double) -> 10:double, IfExprNullCondExpr(col 13:boolean, null, col 15:bigint)(children: LongColEqualLongScalar(col 3:bigint, val 1) -> 13:boolean, LongColSubtractLongScalar(col 3:bigint, val 1) -> 15:bigint) -> 16:bigint) -> 14:double) -> 10:double) -> 14:double, DoubleColMultiplyDoubleColumn(col 10:double, col 17:double)(children: FuncPowerDoubleToDouble(col 17:double)(children: DoubleColDivideLongColumn(col 10:double, col 19:bigint)(children: DoubleColSubtractDoubleColumn(col 4:double, col 17:double)(children: DoubleColDivideLongColumn(col 10:double, col 3:bigint)(children: DoubleColMultiplyDoubleColumn(col 5:double, col 5:double) -> 10:double) -> 17:double) -> 10:double, IfExprNullCondExpr(col 16:boolean, null, col 18:bigint)(children: LongColEqualLongScalar(col 3:bigint, val 1) -> 16:boolean, LongColSubtractLongScalar(col 3:bigint, val 1) -> 18:bigint) -> 19:bigint) -> 17:double) -> 10:double, CastLongToDouble(col 3:bigint) -> 17:double) -> 20:double, DoubleScalarDivideDoubleColumn(val 9763215.5639, col 1:double) -> 10:double, DecimalColDivideDecimalScalar(col 21:decimal(19,0), val -1.389)(children: CastLongToDecimal(col 3:bigint) -> 21:decimal(19,0)) -> 22:decimal(28,6), FuncPowerDoubleToDouble(col 23:double)(children: DoubleColDivideLongColumn(col 17:double, col 25:bigint)(children: DoubleColSubtractDoubleColumn(col 4:double, col 23:double)(children: DoubleColDivideLongColumn(col 17:double, col 3:bigint)(children: DoubleColMultiplyDoubleColumn(col 5:double, col 5:double) -> 17:double) -> 23:double) -> 17:double, IfExprNullCondExpr(col 19:boolean, null, col 24:bigint)(children: LongColEqualLongScalar(col 3:bigint, val 1) -> 19:boolean, LongColSubtractLongScalar(col 3:bigint, val 1) -> 24:bigint) -> 25:bigint) -> 23:double) -> 17:double - Statistics: Num rows: 2048 Data size: 24576 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2048 Data size: 991838 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 2048 Data size: 24576 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2048 Data size: 991838 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/spark/parquet_vectorization_decimal_date.q.out b/ql/src/test/results/clientpositive/spark/parquet_vectorization_decimal_date.q.out index 040fd997d1..00e25889ed 100644 --- a/ql/src/test/results/clientpositive/spark/parquet_vectorization_decimal_date.q.out +++ b/ql/src/test/results/clientpositive/spark/parquet_vectorization_decimal_date.q.out @@ -34,7 +34,7 @@ STAGE PLANS: TableScan alias: date_decimal_test_parquet filterExpr: (cint is not null and cdouble is not null) (type: boolean) - Statistics: Num rows: 12288 Data size: 49152 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 176614 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true Filter Operator @@ -43,7 +43,7 @@ STAGE PLANS: native: true predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 0:int), SelectColumnIsNotNull(col 1:double)) predicate: (cdouble is not null and cint is not null) (type: boolean) - Statistics: Num rows: 12288 Data size: 49152 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 176614 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cdate (type: date), cdecimal (type: decimal(20,10)) outputColumnNames: _col0, _col1 @@ -51,19 +51,19 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [2, 3] - Statistics: Num rows: 12288 Data size: 49152 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 176614 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 10 Limit Vectorization: className: VectorLimitOperator native: true - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 140 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 140 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/spark/parquet_vectorization_div0.q.out b/ql/src/test/results/clientpositive/spark/parquet_vectorization_div0.q.out index f448a3e8bb..de5bb311a4 100644 --- a/ql/src/test/results/clientpositive/spark/parquet_vectorization_div0.q.out +++ b/ql/src/test/results/clientpositive/spark/parquet_vectorization_div0.q.out @@ -21,7 +21,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesparquet - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 5951030 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true Select Operator @@ -32,19 +32,19 @@ STAGE PLANS: native: true projectedOutputColumnNums: [13] selectExpressions: DoubleColDivideDoubleScalar(col 5:double, val 0.0) -> 13:double - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 5951030 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 100 Limit Vectorization: className: VectorLimitOperator native: true - Statistics: Num rows: 100 Data size: 1200 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 100 Data size: 48400 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 100 Data size: 1200 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 100 Data size: 48400 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -202,7 +202,7 @@ STAGE PLANS: TableScan alias: alltypesparquet filterExpr: ((cbigint > 0L) and (cbigint < 100000000L)) (type: boolean) - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 5951030 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true Filter Operator @@ -211,7 +211,7 @@ STAGE PLANS: native: true predicateExpression: FilterExprAndExpr(children: FilterLongColGreaterLongScalar(col 3:bigint, val 0), FilterLongColLessLongScalar(col 3:bigint, val 100000000)) predicate: ((cbigint < 100000000L) and (cbigint > 0L)) (type: boolean) - Statistics: Num rows: 1365 Data size: 16380 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1365 Data size: 661064 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: (cbigint - 988888) (type: bigint), (cdouble / UDFToDouble((cbigint - 988888))) (type: double), (1.2 / CAST( (cbigint - 988888) AS decimal(19,0))) (type: decimal(22,21)) outputColumnNames: _col0, _col1, _col2 @@ -220,7 +220,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [13, 16, 18] selectExpressions: LongColSubtractLongScalar(col 3:bigint, val 988888) -> 13:bigint, DoubleColDivideDoubleColumn(col 5:double, col 15:double)(children: CastLongToDouble(col 14:bigint)(children: LongColSubtractLongScalar(col 3:bigint, val 988888) -> 14:bigint) -> 15:double) -> 16:double, DecimalScalarDivideDecimalColumn(val 1.2, col 17:decimal(19,0))(children: CastLongToDecimal(col 14:bigint)(children: LongColSubtractLongScalar(col 3:bigint, val 988888) -> 14:bigint) -> 17:decimal(19,0)) -> 18:decimal(22,21) - Statistics: Num rows: 1365 Data size: 16380 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1365 Data size: 661064 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: bigint), _col1 (type: double) sort order: ++ @@ -228,7 +228,7 @@ STAGE PLANS: className: VectorReduceSinkObjectHashOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 1365 Data size: 16380 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1365 Data size: 661064 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col2 (type: decimal(22,21)) Execution mode: vectorized @@ -257,19 +257,19 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1, 2] - Statistics: Num rows: 1365 Data size: 16380 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1365 Data size: 661064 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 100 Limit Vectorization: className: VectorLimitOperator native: true - Statistics: Num rows: 100 Data size: 1200 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 100 Data size: 48400 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 100 Data size: 1200 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 100 Data size: 48400 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -419,7 +419,7 @@ STAGE PLANS: TableScan alias: alltypesparquet filterExpr: ((cdouble >= -500.0D) and (cdouble < -199.0D)) (type: boolean) - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 5951030 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true Filter Operator @@ -428,7 +428,7 @@ STAGE PLANS: native: true predicateExpression: FilterExprAndExpr(children: FilterDoubleColGreaterEqualDoubleScalar(col 5:double, val -500.0), FilterDoubleColLessDoubleScalar(col 5:double, val -199.0)) predicate: ((cdouble < -199.0D) and (cdouble >= -500.0D)) (type: boolean) - Statistics: Num rows: 1365 Data size: 16380 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1365 Data size: 661064 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: (cdouble + 200.0D) (type: double), (UDFToDouble(cbigint) / (cdouble + 200.0D)) (type: double), ((cdouble + 200.0D) / (cdouble + 200.0D)) (type: double), (3.0D / (cdouble + 200.0D)) (type: double), (1.2D / (cdouble + 200.0D)) (type: double) outputColumnNames: _col0, _col1, _col2, _col4, _col5 @@ -437,7 +437,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [13, 16, 17, 15, 18] selectExpressions: DoubleColAddDoubleScalar(col 5:double, val 200.0) -> 13:double, DoubleColDivideDoubleColumn(col 14:double, col 15:double)(children: CastLongToDouble(col 3:bigint) -> 14:double, DoubleColAddDoubleScalar(col 5:double, val 200.0) -> 15:double) -> 16:double, DoubleColDivideDoubleColumn(col 14:double, col 15:double)(children: DoubleColAddDoubleScalar(col 5:double, val 200.0) -> 14:double, DoubleColAddDoubleScalar(col 5:double, val 200.0) -> 15:double) -> 17:double, DoubleScalarDivideDoubleColumn(val 3.0, col 14:double)(children: DoubleColAddDoubleScalar(col 5:double, val 200.0) -> 14:double) -> 15:double, DoubleScalarDivideDoubleColumn(val 1.2, col 14:double)(children: DoubleColAddDoubleScalar(col 5:double, val 200.0) -> 14:double) -> 18:double - Statistics: Num rows: 1365 Data size: 16380 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1365 Data size: 661064 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: double), _col1 (type: double) sort order: ++ @@ -445,7 +445,7 @@ STAGE PLANS: className: VectorReduceSinkObjectHashOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 1365 Data size: 16380 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1365 Data size: 661064 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col2 (type: double), _col4 (type: double), _col5 (type: double) Execution mode: vectorized @@ -474,19 +474,19 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1, 2, 1, 3, 4] - Statistics: Num rows: 1365 Data size: 16380 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1365 Data size: 661064 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 100 Limit Vectorization: className: VectorLimitOperator native: true - Statistics: Num rows: 100 Data size: 1200 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 100 Data size: 48400 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 100 Data size: 1200 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 100 Data size: 48400 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/spark/parquet_vectorization_limit.q.out b/ql/src/test/results/clientpositive/spark/parquet_vectorization_limit.q.out index b2c0a64acb..dd473b06c8 100644 --- a/ql/src/test/results/clientpositive/spark/parquet_vectorization_limit.q.out +++ b/ql/src/test/results/clientpositive/spark/parquet_vectorization_limit.q.out @@ -21,20 +21,20 @@ STAGE PLANS: TableScan alias: alltypesparquet filterExpr: ((UDFToDouble(cbigint) < cdouble) and (cint > 0)) (type: boolean) - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 5951030 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: ((UDFToDouble(cbigint) < cdouble) and (cint > 0)) (type: boolean) - Statistics: Num rows: 1365 Data size: 16380 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1365 Data size: 661064 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cbigint (type: bigint), cdouble (type: double) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1365 Data size: 16380 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1365 Data size: 661064 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 7 - Statistics: Num rows: 7 Data size: 84 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 7 Data size: 3388 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 7 Data size: 84 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 7 Data size: 3388 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -98,7 +98,7 @@ STAGE PLANS: TableScan alias: alltypesparquet filterExpr: ctinyint is not null (type: boolean) - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 5951030 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct] @@ -108,7 +108,7 @@ STAGE PLANS: native: true predicateExpression: SelectColumnIsNotNull(col 0:tinyint) predicate: ctinyint is not null (type: boolean) - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 5951030 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ctinyint (type: tinyint), cdouble (type: double), csmallint (type: smallint) outputColumnNames: _col0, _col1, _col2 @@ -116,7 +116,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 5, 1] - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 5951030 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: tinyint), _col1 (type: double) sort order: ++ @@ -126,7 +126,7 @@ STAGE PLANS: native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true valueColumnNums: [1] - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 5951030 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.3 value expressions: _col2 (type: smallint) Execution mode: vectorized @@ -168,19 +168,19 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1, 2] - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 5951030 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 20 Limit Vectorization: className: VectorLimitOperator native: true - Statistics: Num rows: 20 Data size: 240 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 9680 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 20 Data size: 240 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 9680 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -245,7 +245,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesparquet - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 5951030 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct] @@ -257,7 +257,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [0, 13] selectExpressions: DoubleColAddDoubleScalar(col 5:double, val 1.0) -> 13:double - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 5951030 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(_col1), count(_col1) Group By Vectorization: @@ -271,7 +271,7 @@ STAGE PLANS: keys: _col0 (type: tinyint) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 5951030 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: tinyint) sort order: + @@ -283,7 +283,7 @@ STAGE PLANS: nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true partitionColumnNums: [0] valueColumnNums: [1, 2] - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 5951030 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.3 value expressions: _col1 (type: double), _col2 (type: bigint) Execution mode: vectorized @@ -331,7 +331,7 @@ STAGE PLANS: keys: KEY._col0 (type: tinyint) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6144 Data size: 73728 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6144 Data size: 2975515 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: tinyint), (_col1 / _col2) (type: double) outputColumnNames: _col0, _col1 @@ -340,19 +340,19 @@ STAGE PLANS: native: true projectedOutputColumnNums: [0, 3] selectExpressions: DoubleColDivideLongColumn(col 1:double, col 2:bigint) -> 3:double - Statistics: Num rows: 6144 Data size: 73728 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6144 Data size: 2975515 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 20 Limit Vectorization: className: VectorLimitOperator native: true - Statistics: Num rows: 20 Data size: 240 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 9680 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 20 Data size: 240 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 9680 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -417,7 +417,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesparquet - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 5951030 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct] @@ -428,7 +428,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0] - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 5951030 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: className: VectorGroupByOperator @@ -440,7 +440,7 @@ STAGE PLANS: keys: ctinyint (type: tinyint) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 5951030 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: tinyint) sort order: + @@ -451,7 +451,7 @@ STAGE PLANS: native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true valueColumnNums: [] - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 5951030 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.3 Execution mode: vectorized Map Vectorization: @@ -496,19 +496,19 @@ STAGE PLANS: keys: KEY._col0 (type: tinyint) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 6144 Data size: 73728 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6144 Data size: 2975515 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 20 Limit Vectorization: className: VectorLimitOperator native: true - Statistics: Num rows: 20 Data size: 240 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 9680 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 20 Data size: 240 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 9680 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -573,7 +573,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesparquet - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 5951030 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct] @@ -584,7 +584,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 5] - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 5951030 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: className: VectorGroupByOperator @@ -596,7 +596,7 @@ STAGE PLANS: keys: ctinyint (type: tinyint), cdouble (type: double) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 5951030 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: tinyint), _col1 (type: double) sort order: ++ @@ -608,7 +608,7 @@ STAGE PLANS: nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true partitionColumnNums: [0] valueColumnNums: [] - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 5951030 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Map Vectorization: enabled: true @@ -652,7 +652,7 @@ STAGE PLANS: keys: KEY._col0 (type: tinyint), KEY._col1 (type: double) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 6144 Data size: 73728 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6144 Data size: 2975515 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(_col1) Group By Vectorization: @@ -666,19 +666,19 @@ STAGE PLANS: keys: _col0 (type: tinyint) mode: complete outputColumnNames: _col0, _col1 - Statistics: Num rows: 3072 Data size: 36864 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3072 Data size: 1487757 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 20 Limit Vectorization: className: VectorLimitOperator native: true - Statistics: Num rows: 20 Data size: 240 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 9680 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 20 Data size: 240 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 9680 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -773,7 +773,7 @@ STAGE PLANS: TableScan alias: alltypesparquet filterExpr: ctinyint is not null (type: boolean) - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 5951030 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct] @@ -783,7 +783,7 @@ STAGE PLANS: native: true predicateExpression: SelectColumnIsNotNull(col 0:tinyint) predicate: ctinyint is not null (type: boolean) - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 5951030 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(ctinyint) Group By Vectorization: @@ -797,7 +797,7 @@ STAGE PLANS: keys: cdouble (type: double) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 5951030 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: double) sort order: + @@ -808,7 +808,7 @@ STAGE PLANS: native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true valueColumnNums: [1] - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 5951030 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Execution mode: vectorized Map Vectorization: @@ -855,7 +855,7 @@ STAGE PLANS: keys: KEY._col0 (type: double) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 6144 Data size: 73728 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6144 Data size: 2975515 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: bigint), _col0 (type: double) sort order: ++ @@ -865,7 +865,7 @@ STAGE PLANS: native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true valueColumnNums: [] - Statistics: Num rows: 6144 Data size: 73728 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6144 Data size: 2975515 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.3 Reducer 3 Execution mode: vectorized @@ -890,19 +890,19 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [1, 0] - Statistics: Num rows: 6144 Data size: 73728 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6144 Data size: 2975515 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 20 Limit Vectorization: className: VectorLimitOperator native: true - Statistics: Num rows: 20 Data size: 240 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 9680 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 20 Data size: 240 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 9680 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/spark/parquet_vectorization_offset_limit.q.out b/ql/src/test/results/clientpositive/spark/parquet_vectorization_offset_limit.q.out index 3c616d9f1f..44f4819005 100644 --- a/ql/src/test/results/clientpositive/spark/parquet_vectorization_offset_limit.q.out +++ b/ql/src/test/results/clientpositive/spark/parquet_vectorization_offset_limit.q.out @@ -21,21 +21,21 @@ STAGE PLANS: TableScan alias: alltypesparquet filterExpr: ((UDFToDouble(cbigint) < cdouble) and (cint > 0)) (type: boolean) - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 5951030 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: ((UDFToDouble(cbigint) < cdouble) and (cint > 0)) (type: boolean) - Statistics: Num rows: 1365 Data size: 16380 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1365 Data size: 661064 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cbigint (type: bigint), cdouble (type: double) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1365 Data size: 16380 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1365 Data size: 661064 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 2 Offset of rows: 3 - Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 968 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 968 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -94,7 +94,7 @@ STAGE PLANS: TableScan alias: alltypesparquet filterExpr: ctinyint is not null (type: boolean) - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 5951030 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true Filter Operator @@ -103,7 +103,7 @@ STAGE PLANS: native: true predicateExpression: SelectColumnIsNotNull(col 0:tinyint) predicate: ctinyint is not null (type: boolean) - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 5951030 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ctinyint (type: tinyint), cdouble (type: double), csmallint (type: smallint) outputColumnNames: _col0, _col1, _col2 @@ -111,7 +111,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 5, 1] - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 5951030 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: tinyint), _col1 (type: double) sort order: ++ @@ -119,7 +119,7 @@ STAGE PLANS: className: VectorReduceSinkObjectHashOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 5951030 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col2 (type: smallint) Execution mode: vectorized @@ -148,20 +148,20 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1, 2] - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 5951030 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 3 Limit Vectorization: className: VectorLimitOperator native: true Offset of rows: 10 - Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 1452 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 1452 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/spark/parquet_vectorization_part_project.q.out b/ql/src/test/results/clientpositive/spark/parquet_vectorization_part_project.q.out index c2b34c8785..35ba733fdf 100644 --- a/ql/src/test/results/clientpositive/spark/parquet_vectorization_part_project.q.out +++ b/ql/src/test/results/clientpositive/spark/parquet_vectorization_part_project.q.out @@ -69,15 +69,15 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesparquet_part_n0 - Statistics: Num rows: 200 Data size: 2400 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 200 Data size: 10942 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: (cdouble + 2.0D) (type: double) outputColumnNames: _col0 - Statistics: Num rows: 200 Data size: 2400 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 200 Data size: 10942 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: double) sort order: + - Statistics: Num rows: 200 Data size: 2400 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 200 Data size: 10942 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 Execution mode: vectorized Map Vectorization: @@ -101,13 +101,13 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: double) outputColumnNames: _col0 - Statistics: Num rows: 200 Data size: 2400 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 200 Data size: 10942 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 10 - Statistics: Num rows: 10 Data size: 120 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 540 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 10 Data size: 120 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 540 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/spark/parquet_vectorization_pushdown.q.out b/ql/src/test/results/clientpositive/spark/parquet_vectorization_pushdown.q.out index 212a83e8f9..0474a60fff 100644 --- a/ql/src/test/results/clientpositive/spark/parquet_vectorization_pushdown.q.out +++ b/ql/src/test/results/clientpositive/spark/parquet_vectorization_pushdown.q.out @@ -23,14 +23,14 @@ STAGE PLANS: TableScan alias: alltypesparquet filterExpr: (UDFToDouble(cbigint) < cdouble) (type: boolean) - Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 5951030 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (UDFToDouble(cbigint) < cdouble) (type: boolean) - Statistics: Num rows: 4096 Data size: 49152 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4096 Data size: 1983676 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cbigint (type: bigint) outputColumnNames: cbigint - Statistics: Num rows: 4096 Data size: 49152 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4096 Data size: 1983676 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(cbigint), count(cbigint) mode: hash diff --git a/ql/src/test/results/clientpositive/spark/spark_dynamic_partition_pruning.q.out b/ql/src/test/results/clientpositive/spark/spark_dynamic_partition_pruning.q.out index cecee578db..315288c547 100644 --- a/ql/src/test/results/clientpositive/spark/spark_dynamic_partition_pruning.q.out +++ b/ql/src/test/results/clientpositive/spark/spark_dynamic_partition_pruning.q.out @@ -6404,11 +6404,11 @@ STAGE PLANS: Map Operator Tree: TableScan alias: srcpart_parquet - Statistics: Num rows: 2000 Data size: 4000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2000 Data size: 23372 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ds (type: string), hr (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 2000 Data size: 4000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2000 Data size: 23372 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 @@ -6417,7 +6417,7 @@ STAGE PLANS: 1 _col0 (type: string), UDFToDouble(_col2) (type: double) input vertices: 1 Map 3 - Statistics: Num rows: 2200 Data size: 4400 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2200 Data size: 25709 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() mode: hash diff --git a/ql/src/test/results/clientpositive/spark/vectorization_input_format_excludes.q.out b/ql/src/test/results/clientpositive/spark/vectorization_input_format_excludes.q.out index 16ebaa59ec..32f5e22bf2 100644 --- a/ql/src/test/results/clientpositive/spark/vectorization_input_format_excludes.q.out +++ b/ql/src/test/results/clientpositive/spark/vectorization_input_format_excludes.q.out @@ -74,20 +74,20 @@ STAGE PLANS: TableScan alias: alltypes_parquet_n0 filterExpr: (cint = 528534767) (type: boolean) - Statistics: Num rows: 12288 Data size: 73728 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 256791 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (cint = 528534767) (type: boolean) - Statistics: Num rows: 6144 Data size: 36864 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6144 Data size: 128395 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: 528534767 (type: int), ctinyint (type: tinyint), csmallint (type: smallint), cfloat (type: float), cdouble (type: double), cstring1 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 6144 Data size: 36864 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6144 Data size: 128395 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 10 - Statistics: Num rows: 10 Data size: 60 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 200 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 10 Data size: 60 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 200 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -170,22 +170,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypes_parquet_n0 - Statistics: Num rows: 12288 Data size: 73728 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 256791 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ctinyint (type: tinyint), cint (type: int), csmallint (type: smallint), cstring1 (type: string), cfloat (type: float), cdouble (type: double), (cdouble * cdouble) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 12288 Data size: 73728 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 256791 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: max(_col1), min(_col2), count(_col3), sum(_col4), count(_col4), sum(_col6), sum(_col5), count(_col5) keys: _col0 (type: tinyint) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 12288 Data size: 73728 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 256791 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: tinyint) sort order: + Map-reduce partition columns: _col0 (type: tinyint) - Statistics: Num rows: 12288 Data size: 73728 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 256791 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col2 (type: smallint), _col3 (type: bigint), _col4 (type: double), _col5 (type: bigint), _col6 (type: double), _col7 (type: double), _col8 (type: bigint) Execution mode: vectorized Map Vectorization: @@ -211,14 +211,14 @@ STAGE PLANS: keys: KEY._col0 (type: tinyint) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 6144 Data size: 36864 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6144 Data size: 128395 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: tinyint), _col1 (type: int), _col2 (type: smallint), _col3 (type: bigint), (_col4 / _col5) (type: double), power(((_col6 - ((_col7 * _col7) / _col8)) / _col8), 0.5) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 6144 Data size: 36864 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6144 Data size: 128395 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 6144 Data size: 36864 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6144 Data size: 128395 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -408,20 +408,20 @@ STAGE PLANS: TableScan alias: alltypes_parquet_n0 filterExpr: (cint = 528534767) (type: boolean) - Statistics: Num rows: 12288 Data size: 73728 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 256791 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (cint = 528534767) (type: boolean) - Statistics: Num rows: 6144 Data size: 36864 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6144 Data size: 128395 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: 528534767 (type: int), ctinyint (type: tinyint), csmallint (type: smallint), cfloat (type: float), cdouble (type: double), cstring1 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 6144 Data size: 36864 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6144 Data size: 128395 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 10 - Statistics: Num rows: 10 Data size: 60 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 200 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 10 Data size: 60 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 200 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -498,22 +498,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypes_parquet_n0 - Statistics: Num rows: 12288 Data size: 73728 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 256791 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ctinyint (type: tinyint), cint (type: int), csmallint (type: smallint), cstring1 (type: string), cfloat (type: float), cdouble (type: double), (cdouble * cdouble) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 12288 Data size: 73728 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 256791 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: max(_col1), min(_col2), count(_col3), sum(_col4), count(_col4), sum(_col6), sum(_col5), count(_col5) keys: _col0 (type: tinyint) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 12288 Data size: 73728 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 256791 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: tinyint) sort order: + Map-reduce partition columns: _col0 (type: tinyint) - Statistics: Num rows: 12288 Data size: 73728 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 256791 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col2 (type: smallint), _col3 (type: bigint), _col4 (type: double), _col5 (type: bigint), _col6 (type: double), _col7 (type: double), _col8 (type: bigint) Map Vectorization: enabled: false @@ -533,14 +533,14 @@ STAGE PLANS: keys: KEY._col0 (type: tinyint) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 6144 Data size: 36864 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6144 Data size: 128395 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: tinyint), _col1 (type: int), _col2 (type: smallint), _col3 (type: bigint), (_col4 / _col5) (type: double), power(((_col6 - ((_col7 * _col7) / _col8)) / _col8), 0.5) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 6144 Data size: 36864 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6144 Data size: 128395 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 6144 Data size: 36864 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6144 Data size: 128395 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -730,20 +730,20 @@ STAGE PLANS: TableScan alias: alltypes_parquet_n0 filterExpr: (cint = 528534767) (type: boolean) - Statistics: Num rows: 12288 Data size: 73728 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 256791 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (cint = 528534767) (type: boolean) - Statistics: Num rows: 6144 Data size: 36864 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6144 Data size: 128395 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: 528534767 (type: int), ctinyint (type: tinyint), csmallint (type: smallint), cfloat (type: float), cdouble (type: double), cstring1 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 6144 Data size: 36864 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6144 Data size: 128395 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 10 - Statistics: Num rows: 10 Data size: 60 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 200 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 10 Data size: 60 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 200 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -826,22 +826,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypes_parquet_n0 - Statistics: Num rows: 12288 Data size: 73728 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 256791 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ctinyint (type: tinyint), cint (type: int), csmallint (type: smallint), cstring1 (type: string), cfloat (type: float), cdouble (type: double), (cdouble * cdouble) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 12288 Data size: 73728 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 256791 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: max(_col1), min(_col2), count(_col3), sum(_col4), count(_col4), sum(_col6), sum(_col5), count(_col5) keys: _col0 (type: tinyint) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 12288 Data size: 73728 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 256791 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: tinyint) sort order: + Map-reduce partition columns: _col0 (type: tinyint) - Statistics: Num rows: 12288 Data size: 73728 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 256791 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col2 (type: smallint), _col3 (type: bigint), _col4 (type: double), _col5 (type: bigint), _col6 (type: double), _col7 (type: double), _col8 (type: bigint) Execution mode: vectorized Map Vectorization: @@ -867,14 +867,14 @@ STAGE PLANS: keys: KEY._col0 (type: tinyint) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 6144 Data size: 36864 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6144 Data size: 128395 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: tinyint), _col1 (type: int), _col2 (type: smallint), _col3 (type: bigint), (_col4 / _col5) (type: double), power(((_col6 - ((_col7 * _col7) / _col8)) / _col8), 0.5) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 6144 Data size: 36864 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6144 Data size: 128395 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 6144 Data size: 36864 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6144 Data size: 128395 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1525,17 +1525,17 @@ STAGE PLANS: TableScan alias: parquettbl filterExpr: (UDFToInteger((t1 + t2)) > 10) (type: boolean) - Statistics: Num rows: 3 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 126 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (UDFToInteger((t1 + t2)) > 10) (type: boolean) - Statistics: Num rows: 1 Data size: 2 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 42 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: t1 (type: tinyint), t2 (type: tinyint), (t1 + t2) (type: tinyint) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 2 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 42 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 2 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 42 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/spark/vectorization_parquet_projection.q.out b/ql/src/test/results/clientpositive/spark/vectorization_parquet_projection.q.out index 5ee04f4119..9fa54d01bd 100644 --- a/ql/src/test/results/clientpositive/spark/vectorization_parquet_projection.q.out +++ b/ql/src/test/results/clientpositive/spark/vectorization_parquet_projection.q.out @@ -135,14 +135,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: parquet_project_test - Statistics: Num rows: 22 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 22 Data size: 709 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cint (type: int), m1 (type: map) outputColumnNames: _col0, _col1 - Statistics: Num rows: 22 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 22 Data size: 709 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 22 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 22 Data size: 709 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -213,9 +213,9 @@ STAGE PLANS: Map Operator Tree: TableScan alias: parquet_project_test - Statistics: Num rows: 22 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 22 Data size: 709 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - Statistics: Num rows: 22 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 22 Data size: 709 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash @@ -297,22 +297,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: parquet_project_test - Statistics: Num rows: 22 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 22 Data size: 709 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cint (type: int) outputColumnNames: cint - Statistics: Num rows: 22 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 22 Data size: 709 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() keys: cint (type: int) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 22 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 22 Data size: 709 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 22 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 22 Data size: 709 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Execution mode: vectorized Map Vectorization: @@ -338,10 +338,10 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 11 Data size: 22 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 11 Data size: 354 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 11 Data size: 22 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 11 Data size: 354 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -391,22 +391,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: parquet_project_test - Statistics: Num rows: 22 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 22 Data size: 709 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: m1['color'] (type: string) outputColumnNames: _col0 - Statistics: Num rows: 22 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 22 Data size: 709 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 22 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 22 Data size: 709 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 22 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 22 Data size: 709 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Map Vectorization: enabled: true @@ -428,10 +428,10 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 11 Data size: 22 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 11 Data size: 354 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 11 Data size: 22 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 11 Data size: 354 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -502,9 +502,9 @@ STAGE PLANS: TableScan alias: parquet_nullsplit filterExpr: (len = '1') (type: boolean) - Statistics: Num rows: 1 Data size: 2 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 108 Basic stats: COMPLETE Column stats: NONE Select Operator - Statistics: Num rows: 1 Data size: 2 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 108 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() mode: hash diff --git a/ql/src/test/results/clientpositive/vectorization_numeric_overflows.q.out b/ql/src/test/results/clientpositive/vectorization_numeric_overflows.q.out index 35a8eefac6..5ef54a7418 100644 --- a/ql/src/test/results/clientpositive/vectorization_numeric_overflows.q.out +++ b/ql/src/test/results/clientpositive/vectorization_numeric_overflows.q.out @@ -124,7 +124,7 @@ STAGE PLANS: TableScan alias: test_overflow filterExpr: ((cint1 - 2) > 0) (type: boolean) - Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 2220 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true Filter Operator @@ -133,7 +133,7 @@ STAGE PLANS: native: true predicateExpression: FilterLongColGreaterLongScalar(col 13:int, val 0)(children: LongColSubtractLongScalarChecked(col 4:int, val 2) -> 13:int) predicate: ((cint1 - 2) > 0) (type: boolean) - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 740 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cint1 (type: int), (cint1 - 2) (type: int) outputColumnNames: _col0, _col1 @@ -142,7 +142,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [4, 13] selectExpressions: LongColSubtractLongScalarChecked(col 4:int, val 2) -> 13:int - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 740 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + @@ -151,7 +151,7 @@ STAGE PLANS: native: false nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 740 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int) Execution mode: vectorized Map Vectorization: @@ -171,10 +171,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 740 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 740 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -229,7 +229,7 @@ STAGE PLANS: TableScan alias: test_overflow filterExpr: ((cint2 + 2) < 0) (type: boolean) - Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 2220 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true Filter Operator @@ -238,7 +238,7 @@ STAGE PLANS: native: true predicateExpression: FilterLongColLessLongScalar(col 13:int, val 0)(children: LongColAddLongScalarChecked(col 5:int, val 2) -> 13:int) predicate: ((cint2 + 2) < 0) (type: boolean) - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 740 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cint2 (type: int), (cint2 + 2) (type: int) outputColumnNames: _col0, _col1 @@ -247,7 +247,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [5, 13] selectExpressions: LongColAddLongScalarChecked(col 5:int, val 2) -> 13:int - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 740 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + @@ -256,7 +256,7 @@ STAGE PLANS: native: false nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 740 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int) Execution mode: vectorized Map Vectorization: @@ -276,10 +276,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 740 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 740 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -332,7 +332,7 @@ STAGE PLANS: TableScan alias: test_overflow filterExpr: ((cint2 * 2) < 0) (type: boolean) - Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 2220 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true Filter Operator @@ -341,7 +341,7 @@ STAGE PLANS: native: true predicateExpression: FilterLongColLessLongScalar(col 13:int, val 0)(children: LongColMultiplyLongScalarChecked(col 5:int, val 2) -> 13:int) predicate: ((cint2 * 2) < 0) (type: boolean) - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 740 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cint2 (type: int), (cint2 * 2) (type: int) outputColumnNames: _col0, _col1 @@ -350,7 +350,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [5, 13] selectExpressions: LongColMultiplyLongScalarChecked(col 5:int, val 2) -> 13:int - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 740 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + @@ -359,7 +359,7 @@ STAGE PLANS: native: false nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 740 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int) Execution mode: vectorized Map Vectorization: @@ -379,10 +379,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 740 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 740 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -435,7 +435,7 @@ STAGE PLANS: TableScan alias: test_overflow filterExpr: ((ctinyint1 - 2Y) > 0) (type: boolean) - Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 2220 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true Filter Operator @@ -444,7 +444,7 @@ STAGE PLANS: native: true predicateExpression: FilterLongColGreaterLongScalar(col 13:tinyint, val 0)(children: LongColSubtractLongScalarChecked(col 0:tinyint, val 2) -> 13:tinyint) predicate: ((ctinyint1 - 2Y) > 0) (type: boolean) - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 740 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ctinyint1 (type: tinyint), (ctinyint1 - 2Y) (type: tinyint) outputColumnNames: _col0, _col1 @@ -453,7 +453,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [0, 13] selectExpressions: LongColSubtractLongScalarChecked(col 0:tinyint, val 2) -> 13:tinyint - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 740 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: tinyint) sort order: + @@ -462,7 +462,7 @@ STAGE PLANS: native: false nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 740 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: tinyint) Execution mode: vectorized Map Vectorization: @@ -482,10 +482,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: tinyint), VALUE._col0 (type: tinyint) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 740 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 740 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -540,7 +540,7 @@ STAGE PLANS: TableScan alias: test_overflow filterExpr: ((ctinyint2 + 2Y) < 0) (type: boolean) - Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 2220 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true Filter Operator @@ -549,7 +549,7 @@ STAGE PLANS: native: true predicateExpression: FilterLongColLessLongScalar(col 13:tinyint, val 0)(children: LongColAddLongScalarChecked(col 1:tinyint, val 2) -> 13:tinyint) predicate: ((ctinyint2 + 2Y) < 0) (type: boolean) - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 740 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ctinyint2 (type: tinyint), (ctinyint2 + 2) (type: int) outputColumnNames: _col0, _col1 @@ -558,7 +558,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [1, 13] selectExpressions: LongColAddLongScalarChecked(col 1:tinyint, val 2) -> 13:int - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 740 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: tinyint) sort order: + @@ -567,7 +567,7 @@ STAGE PLANS: native: false nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 740 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int) Execution mode: vectorized Map Vectorization: @@ -587,10 +587,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: tinyint), VALUE._col0 (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 740 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 740 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -643,7 +643,7 @@ STAGE PLANS: TableScan alias: test_overflow filterExpr: ((csmallint2 * 2S) < 0) (type: boolean) - Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 2220 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true Filter Operator @@ -652,7 +652,7 @@ STAGE PLANS: native: true predicateExpression: FilterLongColLessLongScalar(col 13:smallint, val 0)(children: LongColMultiplyLongScalarChecked(col 3:smallint, val 2) -> 13:smallint) predicate: ((csmallint2 * 2S) < 0) (type: boolean) - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 740 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: csmallint2 (type: smallint), (csmallint2 * 2) (type: int) outputColumnNames: _col0, _col1 @@ -661,7 +661,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [3, 13] selectExpressions: LongColMultiplyLongScalarChecked(col 3:smallint, val 2) -> 13:int - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 740 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: smallint) sort order: + @@ -670,7 +670,7 @@ STAGE PLANS: native: false nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 740 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int) Execution mode: vectorized Map Vectorization: @@ -690,10 +690,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: smallint), VALUE._col0 (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 740 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 740 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -735,27 +735,27 @@ STAGE PLANS: TableScan alias: test_overflow filterExpr: ((csmallint2 * 2S) < 0) (type: boolean) - Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 2220 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: ((csmallint2 * 2S) < 0) (type: boolean) - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 740 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: csmallint2 (type: smallint), (csmallint2 * 2) (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 740 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: smallint) sort order: + - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 740 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int) Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: smallint), VALUE._col0 (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 740 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 740 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -815,27 +815,27 @@ STAGE PLANS: TableScan alias: parquettable filterExpr: ((t1 - t2) < 50) (type: boolean) - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 276 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: ((t1 - t2) < 50) (type: boolean) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: t1 (type: tinyint), t2 (type: tinyint), (t1 - t2) (type: tinyint) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col2 (type: tinyint) sort order: - - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: tinyint), _col1 (type: tinyint) Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: tinyint), VALUE._col1 (type: tinyint), KEY.reducesinkkey0 (type: tinyint) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -884,7 +884,7 @@ STAGE PLANS: TableScan alias: parquettable filterExpr: ((i1 - i2) < 50) (type: boolean) - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 276 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true Filter Operator @@ -893,7 +893,7 @@ STAGE PLANS: native: true predicateExpression: FilterLongColLessLongScalar(col 5:int, val 50)(children: LongColSubtractLongColumnChecked(col 2:int, col 3:int) -> 5:int) predicate: ((i1 - i2) < 50) (type: boolean) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: i1 (type: int), i2 (type: int), (i1 - i2) (type: int) outputColumnNames: _col0, _col1, _col2 @@ -902,7 +902,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [2, 3, 5] selectExpressions: LongColSubtractLongColumnChecked(col 2:int, col 3:int) -> 5:int - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col2 (type: int) sort order: - @@ -911,7 +911,7 @@ STAGE PLANS: native: false nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col1 (type: int) Execution mode: vectorized Map Vectorization: @@ -931,10 +931,10 @@ STAGE PLANS: Select Operator expressions: VALUE._col0 (type: int), VALUE._col1 (type: int), KEY.reducesinkkey0 (type: int) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1086,7 +1086,7 @@ STAGE PLANS: TableScan alias: test_overflow filterExpr: ((csmallint1 pmod 16385S) > 0) (type: boolean) - Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 2220 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true Filter Operator @@ -1095,7 +1095,7 @@ STAGE PLANS: native: true predicateExpression: FilterLongColGreaterLongScalar(col 13:smallint, val 0)(children: PosModLongToLong(col 2, divisor 16385) -> 13:smallint) predicate: ((csmallint1 pmod 16385S) > 0) (type: boolean) - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 740 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ctinyint1 (type: tinyint), ctinyint2 (type: tinyint), csmallint1 (type: smallint), csmallint2 (type: smallint), cint1 (type: int), cint2 (type: int), cbigint1 (type: bigint), cbigint2 (type: bigint), cfloat1 (type: float), cfloat2 (type: float), cdouble1 (type: double), cdouble2 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 @@ -1103,7 +1103,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 740 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: tinyint) sort order: + @@ -1112,7 +1112,7 @@ STAGE PLANS: native: false nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 740 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: tinyint), _col2 (type: smallint), _col3 (type: smallint), _col4 (type: int), _col5 (type: int), _col6 (type: bigint), _col7 (type: bigint), _col8 (type: float), _col9 (type: float), _col10 (type: double), _col11 (type: double) Execution mode: vectorized Map Vectorization: @@ -1132,10 +1132,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: tinyint), VALUE._col0 (type: tinyint), VALUE._col1 (type: smallint), VALUE._col2 (type: smallint), VALUE._col3 (type: int), VALUE._col4 (type: int), VALUE._col5 (type: bigint), VALUE._col6 (type: bigint), VALUE._col7 (type: float), VALUE._col8 (type: float), VALUE._col9 (type: double), VALUE._col10 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 740 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 740 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat