diff --git ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/ParquetHiveSerDe.java ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/ParquetHiveSerDe.java index e0018a5bc0..0534a285eb 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/ParquetHiveSerDe.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/ParquetHiveSerDe.java @@ -69,30 +69,15 @@ } } - private SerDeStats stats; private ObjectInspector objInspector; - - private enum LAST_OPERATION { - SERIALIZE, - DESERIALIZE, - UNKNOWN - } - - private LAST_OPERATION status; - private long serializedSize; - private long deserializedSize; - private ParquetHiveRecord parquetRow; public ParquetHiveSerDe() { parquetRow = new ParquetHiveRecord(); - stats = new SerDeStats(); } @Override public final void initialize(final Configuration conf, final Properties tbl) throws SerDeException { - - final TypeInfo rowTypeInfo; final List columnNames; final List columnTypes; // Get column names and sort order @@ -128,19 +113,11 @@ public final void initialize(final Configuration conf, final Properties tbl) thr } } this.objInspector = new ArrayWritableObjectInspector(completeTypeInfo, prunedTypeInfo); - - // Stats part - serializedSize = 0; - deserializedSize = 0; - status = LAST_OPERATION.UNKNOWN; } @Override public Object deserialize(final Writable blob) throws SerDeException { - status = LAST_OPERATION.DESERIALIZE; - deserializedSize = 0; if (blob instanceof ArrayWritable) { - deserializedSize = ((ArrayWritable) blob).get().length; return blob; } else { return null; @@ -163,23 +140,21 @@ public Writable serialize(final Object obj, final ObjectInspector objInspector) if (!objInspector.getCategory().equals(Category.STRUCT)) { throw new SerDeException("Cannot serialize " + objInspector.getCategory() + ". Can only serialize a struct"); } - serializedSize = ((StructObjectInspector)objInspector).getAllStructFieldRefs().size(); - status = LAST_OPERATION.SERIALIZE; + parquetRow.value = obj; parquetRow.inspector= (StructObjectInspector)objInspector; return parquetRow; } + /** + * Return null for Parquet format and stats is collected in ParquetRecordWriterWrapper when writer gets + * closed + * + * @return null + */ @Override public SerDeStats getSerDeStats() { - // must be different - assert (status != LAST_OPERATION.UNKNOWN); - if (status == LAST_OPERATION.SERIALIZE) { - stats.setRawDataSize(serializedSize); - } else { - stats.setRawDataSize(deserializedSize); - } - return stats; + return null; } /** diff --git ql/src/java/org/apache/hadoop/hive/ql/io/parquet/write/ParquetRecordWriterWrapper.java ql/src/java/org/apache/hadoop/hive/ql/io/parquet/write/ParquetRecordWriterWrapper.java index db8a33247f..eee21cd9b2 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/parquet/write/ParquetRecordWriterWrapper.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/parquet/write/ParquetRecordWriterWrapper.java @@ -22,6 +22,7 @@ import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.ql.io.StatsProvidingRecordWriter; import org.apache.hadoop.hive.ql.io.parquet.serde.ParquetTableUtils; import org.apache.hadoop.io.NullWritable; import org.apache.hadoop.io.Writable; @@ -32,21 +33,25 @@ import org.apache.hadoop.mapreduce.OutputFormat; import org.apache.hadoop.mapreduce.TaskAttemptContext; import org.apache.hadoop.mapreduce.TaskAttemptID; +import org.apache.hadoop.hive.serde2.SerDeStats; import org.apache.hadoop.hive.serde2.io.ParquetHiveRecord; import org.apache.hadoop.util.Progressable; - +import org.apache.parquet.hadoop.ParquetFileReader; import org.apache.parquet.hadoop.ParquetOutputFormat; +import org.apache.parquet.hadoop.metadata.BlockMetaData; import org.apache.parquet.hadoop.metadata.CompressionCodecName; import org.apache.parquet.hadoop.util.ContextUtil; +import org.apache.parquet.hadoop.util.HadoopInputFile; -public class ParquetRecordWriterWrapper implements RecordWriter, - org.apache.hadoop.hive.ql.exec.FileSinkOperator.RecordWriter { +public class ParquetRecordWriterWrapper implements RecordWriter, StatsProvidingRecordWriter, org.apache.hadoop.hive.ql.exec.FileSinkOperator.RecordWriter { public static final Logger LOG = LoggerFactory.getLogger(ParquetRecordWriterWrapper.class); private final org.apache.hadoop.mapreduce.RecordWriter realWriter; private final TaskAttemptContext taskContext; - + private final JobConf jobConf; + private final Path file; + private SerDeStats stats; public ParquetRecordWriterWrapper( final OutputFormat realOutputFormat, final JobConf jobConf, @@ -66,8 +71,12 @@ public ParquetRecordWriterWrapper( LOG.info("creating real writer to write at " + name); + this.jobConf = jobConf; + this.file = new Path(name); + realWriter = - ((ParquetOutputFormat) realOutputFormat).getRecordWriter(taskContext, new Path(name)); + ((ParquetOutputFormat) realOutputFormat).getRecordWriter(taskContext, this.file); + LOG.info("real writer: " + realWriter); } catch (final InterruptedException e) { @@ -128,6 +137,21 @@ public void close(final Reporter reporter) throws IOException { } catch (final InterruptedException e) { throw new IOException(e); } + + // Collect file stats + try { + ParquetFileReader reader = ParquetFileReader.open(HadoopInputFile.fromPath(this.file, this.jobConf)); + long totalSize = 0; + for (BlockMetaData block : reader.getFooter().getBlocks()) { + totalSize += block.getTotalByteSize(); + } + + stats = new SerDeStats(); + stats.setRowCount(reader.getRecordCount()); + stats.setRawDataSize(totalSize); + } catch(IOException e) { + // Ignore + } } @Override @@ -149,4 +173,9 @@ public void write(final Writable w) throws IOException { write(null, (ParquetHiveRecord) w); } + @Override + public SerDeStats getStats() { + return stats; + } + } diff --git ql/src/test/org/apache/hadoop/hive/ql/io/parquet/TestParquetSerDe.java ql/src/test/org/apache/hadoop/hive/ql/io/parquet/TestParquetSerDe.java index 06f27b5091..097dbebf6f 100644 --- ql/src/test/org/apache/hadoop/hive/ql/io/parquet/TestParquetSerDe.java +++ ql/src/test/org/apache/hadoop/hive/ql/io/parquet/TestParquetSerDe.java @@ -115,7 +115,8 @@ public void testParquetHiveSerDeComplexTypes() throws Throwable { assertEquals(wb[0], boi.getStructFieldData(awb, b)); } - private void deserializeAndSerializeLazySimple(final ParquetHiveSerDe serDe, final ArrayWritable t) throws SerDeException { + private void deserializeAndSerializeLazySimple(final ParquetHiveSerDe serDe, final ArrayWritable t) + throws SerDeException { // Get the row structure final StructObjectInspector oi = (StructObjectInspector) serDe.getObjectInspector(); @@ -123,13 +124,14 @@ private void deserializeAndSerializeLazySimple(final ParquetHiveSerDe serDe, fin // Deserialize final Object row = serDe.deserialize(t); assertEquals("deserialization gives the wrong object class", row.getClass(), ArrayWritable.class); - assertEquals("size correct after deserialization", serDe.getSerDeStats().getRawDataSize(), t.get().length); assertEquals("deserialization gives the wrong object", t, row); // Serialize final ParquetHiveRecord serializedArr = (ParquetHiveRecord) serDe.serialize(row, oi); - assertEquals("size correct after serialization", serDe.getSerDeStats().getRawDataSize(), ((ArrayWritable)serializedArr.getObject()).get().length); - assertTrue("serialized object should be equal to starting object", arrayWritableEquals(t, (ArrayWritable)serializedArr.getObject())); + assertTrue("serialized object should be equal to starting object", + arrayWritableEquals(t, (ArrayWritable)serializedArr.getObject())); + + assertEquals("Stats are not collected during serialization and deserialization", null, serDe.getSerDeStats()); } private Properties createProperties() { diff --git ql/src/test/queries/clientpositive/parquet_stats.q ql/src/test/queries/clientpositive/parquet_stats.q new file mode 100644 index 0000000000..92eaadb293 --- /dev/null +++ ql/src/test/queries/clientpositive/parquet_stats.q @@ -0,0 +1,12 @@ + +DROP TABLE if exists parquet_stats; + +CREATE TABLE parquet_stats ( + id int, + str string +) STORED AS PARQUET; + +SET hive.stats.autogather=true; +INSERT INTO parquet_stats values(0, 'this is string 0'), (1, 'string 1'); +DESC FORMATTED parquet_stats; + diff --git ql/src/test/results/clientpositive/llap/vector_partitioned_date_time.q.out ql/src/test/results/clientpositive/llap/vector_partitioned_date_time.q.out index 09435b38f9..6b76e266bb 100644 --- ql/src/test/results/clientpositive/llap/vector_partitioned_date_time.q.out +++ ql/src/test/results/clientpositive/llap/vector_partitioned_date_time.q.out @@ -4059,7 +4059,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: flights_tiny_parquet_partitioned_date - Statistics: Num rows: 137 Data size: 8357 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 137 Data size: 13861 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true Select Operator @@ -4069,7 +4069,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [5] - Statistics: Num rows: 137 Data size: 8357 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 137 Data size: 13861 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() Group By Vectorization: @@ -5098,7 +5098,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: flights_tiny_parquet_partitioned_timestamp - Statistics: Num rows: 137 Data size: 6165 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 137 Data size: 11189 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true Select Operator @@ -5108,7 +5108,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [5] - Statistics: Num rows: 137 Data size: 6165 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 137 Data size: 11189 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() Group By Vectorization: diff --git ql/src/test/results/clientpositive/parquet_analyze.q.out ql/src/test/results/clientpositive/parquet_analyze.q.out index 16c836dd40..f2088fd459 100644 --- ql/src/test/results/clientpositive/parquet_analyze.q.out +++ ql/src/test/results/clientpositive/parquet_analyze.q.out @@ -93,7 +93,7 @@ Table Parameters: bucketing_version 2 numFiles 1 numRows 100 - rawDataSize 700 + rawDataSize 5936 totalSize 6730 #### A masked pattern was here #### diff --git ql/src/test/results/clientpositive/parquet_stats.q.out ql/src/test/results/clientpositive/parquet_stats.q.out new file mode 100644 index 0000000000..3bc6554bfd --- /dev/null +++ ql/src/test/results/clientpositive/parquet_stats.q.out @@ -0,0 +1,63 @@ +PREHOOK: query: DROP TABLE if exists parquet_stats +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE if exists parquet_stats +POSTHOOK: type: DROPTABLE +PREHOOK: query: CREATE TABLE parquet_stats ( + id int, + str string +) STORED AS PARQUET +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@parquet_stats +POSTHOOK: query: CREATE TABLE parquet_stats ( + id int, + str string +) STORED AS PARQUET +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@parquet_stats +PREHOOK: query: INSERT INTO parquet_stats values(0, 'this is string 0'), (1, 'string 1') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@parquet_stats +POSTHOOK: query: INSERT INTO parquet_stats values(0, 'this is string 0'), (1, 'string 1') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@parquet_stats +POSTHOOK: Lineage: parquet_stats.id SCRIPT [] +POSTHOOK: Lineage: parquet_stats.str SCRIPT [] +PREHOOK: query: DESC FORMATTED parquet_stats +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@parquet_stats +POSTHOOK: query: DESC FORMATTED parquet_stats +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@parquet_stats +# col_name data_type comment +id int +str string + +# Detailed Table Information +Database: default +#### A masked pattern was here #### +Retention: 0 +#### A masked pattern was here #### +Table Type: MANAGED_TABLE +Table Parameters: + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"id\":\"true\",\"str\":\"true\"}} + bucketing_version 2 + numFiles 1 + numRows 2 + rawDataSize 146 + totalSize 431 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe +InputFormat: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 diff --git ql/src/test/results/clientpositive/spark/parquet_join.q.out ql/src/test/results/clientpositive/spark/parquet_join.q.out index 0a092d2c55..b1b1d6c391 100644 --- ql/src/test/results/clientpositive/spark/parquet_join.q.out +++ ql/src/test/results/clientpositive/spark/parquet_join.q.out @@ -87,37 +87,37 @@ STAGE PLANS: TableScan alias: p1 filterExpr: key is not null (type: boolean) - Statistics: Num rows: 2 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 120 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 2 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 120 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 120 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 2 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 120 Basic stats: COMPLETE Column stats: NONE Map 3 Map Operator Tree: TableScan alias: p2 filterExpr: key is not null (type: boolean) - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 199 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 199 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int), myvalue (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 199 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 199 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) Reducer 2 Reduce Operator Tree: @@ -128,14 +128,14 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col2 - Statistics: Num rows: 2 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 132 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col2 (type: string) outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 132 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 2 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 132 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -184,14 +184,14 @@ STAGE PLANS: TableScan alias: p1 filterExpr: key is not null (type: boolean) - Statistics: Num rows: 2 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 120 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 2 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 120 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 120 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: 0 _col0 (type: int) @@ -208,14 +208,14 @@ STAGE PLANS: TableScan alias: p2 filterExpr: key is not null (type: boolean) - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 199 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 199 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int), myvalue (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 199 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 @@ -225,14 +225,14 @@ STAGE PLANS: outputColumnNames: _col2 input vertices: 0 Map 1 - Statistics: Num rows: 2 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 132 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col2 (type: string) outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 132 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 2 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 132 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -319,14 +319,14 @@ STAGE PLANS: TableScan alias: p2 filterExpr: key is not null (type: boolean) - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 245 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 245 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int), value2 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 245 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 @@ -334,14 +334,14 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col1, _col3 - Statistics: Num rows: 2 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 158 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col1 (type: string), _col3 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 158 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 2 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 158 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/spark/parquet_vectorization_decimal_date.q.out ql/src/test/results/clientpositive/spark/parquet_vectorization_decimal_date.q.out index e298dabb2a..c0bfa4d4ba 100644 --- ql/src/test/results/clientpositive/spark/parquet_vectorization_decimal_date.q.out +++ ql/src/test/results/clientpositive/spark/parquet_vectorization_decimal_date.q.out @@ -38,7 +38,7 @@ STAGE PLANS: TableScan alias: date_decimal_test_parquet filterExpr: (cint is not null and cdouble is not null) (type: boolean) - Statistics: Num rows: 12288 Data size: 49152 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 176614 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true Filter Operator @@ -47,7 +47,7 @@ STAGE PLANS: native: true predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 0:int), SelectColumnIsNotNull(col 1:double)) predicate: (cdouble is not null and cint is not null) (type: boolean) - Statistics: Num rows: 12288 Data size: 49152 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 176614 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cdate (type: date), cdecimal (type: decimal(20,10)) outputColumnNames: _col0, _col1 @@ -55,19 +55,19 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [2, 3] - Statistics: Num rows: 12288 Data size: 49152 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 176614 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 10 Limit Vectorization: className: VectorLimitOperator native: true - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 140 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 140 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/spark/parquet_vectorization_part_project.q.out ql/src/test/results/clientpositive/spark/parquet_vectorization_part_project.q.out index 88b97d59bd..ab1de3c972 100644 --- ql/src/test/results/clientpositive/spark/parquet_vectorization_part_project.q.out +++ ql/src/test/results/clientpositive/spark/parquet_vectorization_part_project.q.out @@ -77,15 +77,15 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesparquet_part_n0 - Statistics: Num rows: 200 Data size: 2400 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 200 Data size: 9800 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: (cdouble + 2.0D) (type: double) outputColumnNames: _col0 - Statistics: Num rows: 200 Data size: 2400 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 200 Data size: 9800 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: double) sort order: + - Statistics: Num rows: 200 Data size: 2400 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 200 Data size: 9800 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 Execution mode: vectorized Map Vectorization: @@ -109,13 +109,13 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: double) outputColumnNames: _col0 - Statistics: Num rows: 200 Data size: 2400 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 200 Data size: 9800 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 10 - Statistics: Num rows: 10 Data size: 120 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 490 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 10 Data size: 120 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 490 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/spark/spark_dynamic_partition_pruning.q.out ql/src/test/results/clientpositive/spark/spark_dynamic_partition_pruning.q.out index 5f06e4e832..d3c55a36a7 100644 --- ql/src/test/results/clientpositive/spark/spark_dynamic_partition_pruning.q.out +++ ql/src/test/results/clientpositive/spark/spark_dynamic_partition_pruning.q.out @@ -6988,11 +6988,11 @@ STAGE PLANS: Map Operator Tree: TableScan alias: srcpart_parquet - Statistics: Num rows: 2000 Data size: 4000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2000 Data size: 23372 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ds (type: string), UDFToDouble(hr) (type: double) outputColumnNames: _col0, _col1 - Statistics: Num rows: 2000 Data size: 4000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2000 Data size: 23372 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 @@ -7001,7 +7001,7 @@ STAGE PLANS: 1 _col0 (type: string), _col1 (type: double) input vertices: 1 Map 3 - Statistics: Num rows: 2200 Data size: 4400 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2200 Data size: 25709 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() mode: hash diff --git ql/src/test/results/clientpositive/spark/vectorization_input_format_excludes.q.out ql/src/test/results/clientpositive/spark/vectorization_input_format_excludes.q.out index a6e99f8d24..e37cd5d620 100644 --- ql/src/test/results/clientpositive/spark/vectorization_input_format_excludes.q.out +++ ql/src/test/results/clientpositive/spark/vectorization_input_format_excludes.q.out @@ -78,20 +78,20 @@ STAGE PLANS: TableScan alias: alltypes_parquet_n0 filterExpr: (cint = 528534767) (type: boolean) - Statistics: Num rows: 12288 Data size: 73728 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 256791 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (cint = 528534767) (type: boolean) - Statistics: Num rows: 6144 Data size: 36864 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6144 Data size: 128395 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: 528534767 (type: int), ctinyint (type: tinyint), csmallint (type: smallint), cfloat (type: float), cdouble (type: double), cstring1 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 6144 Data size: 36864 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6144 Data size: 128395 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 10 - Statistics: Num rows: 10 Data size: 60 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 200 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 10 Data size: 60 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 200 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -178,22 +178,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypes_parquet_n0 - Statistics: Num rows: 12288 Data size: 73728 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 256791 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ctinyint (type: tinyint), cint (type: int), csmallint (type: smallint), cstring1 (type: string), cfloat (type: float), cdouble (type: double), (cdouble * cdouble) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 12288 Data size: 73728 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 256791 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: max(_col1), min(_col2), count(_col3), sum(_col4), count(_col4), sum(_col6), sum(_col5), count(_col5) keys: _col0 (type: tinyint) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 12288 Data size: 73728 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 256791 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: tinyint) sort order: + Map-reduce partition columns: _col0 (type: tinyint) - Statistics: Num rows: 12288 Data size: 73728 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 256791 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col2 (type: smallint), _col3 (type: bigint), _col4 (type: double), _col5 (type: bigint), _col6 (type: double), _col7 (type: double), _col8 (type: bigint) Execution mode: vectorized Map Vectorization: @@ -219,14 +219,14 @@ STAGE PLANS: keys: KEY._col0 (type: tinyint) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 6144 Data size: 36864 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6144 Data size: 128395 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: tinyint), _col1 (type: int), _col2 (type: smallint), _col3 (type: bigint), (_col4 / _col5) (type: double), power(((_col6 - ((_col7 * _col7) / _col8)) / _col8), 0.5) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 6144 Data size: 36864 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6144 Data size: 128395 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 6144 Data size: 36864 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6144 Data size: 128395 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -420,20 +420,20 @@ STAGE PLANS: TableScan alias: alltypes_parquet_n0 filterExpr: (cint = 528534767) (type: boolean) - Statistics: Num rows: 12288 Data size: 73728 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 256791 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (cint = 528534767) (type: boolean) - Statistics: Num rows: 6144 Data size: 36864 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6144 Data size: 128395 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: 528534767 (type: int), ctinyint (type: tinyint), csmallint (type: smallint), cfloat (type: float), cdouble (type: double), cstring1 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 6144 Data size: 36864 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6144 Data size: 128395 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 10 - Statistics: Num rows: 10 Data size: 60 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 200 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 10 Data size: 60 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 200 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -514,22 +514,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypes_parquet_n0 - Statistics: Num rows: 12288 Data size: 73728 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 256791 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ctinyint (type: tinyint), cint (type: int), csmallint (type: smallint), cstring1 (type: string), cfloat (type: float), cdouble (type: double), (cdouble * cdouble) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 12288 Data size: 73728 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 256791 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: max(_col1), min(_col2), count(_col3), sum(_col4), count(_col4), sum(_col6), sum(_col5), count(_col5) keys: _col0 (type: tinyint) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 12288 Data size: 73728 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 256791 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: tinyint) sort order: + Map-reduce partition columns: _col0 (type: tinyint) - Statistics: Num rows: 12288 Data size: 73728 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 256791 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col2 (type: smallint), _col3 (type: bigint), _col4 (type: double), _col5 (type: bigint), _col6 (type: double), _col7 (type: double), _col8 (type: bigint) Map Vectorization: enabled: false @@ -549,14 +549,14 @@ STAGE PLANS: keys: KEY._col0 (type: tinyint) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 6144 Data size: 36864 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6144 Data size: 128395 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: tinyint), _col1 (type: int), _col2 (type: smallint), _col3 (type: bigint), (_col4 / _col5) (type: double), power(((_col6 - ((_col7 * _col7) / _col8)) / _col8), 0.5) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 6144 Data size: 36864 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6144 Data size: 128395 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 6144 Data size: 36864 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6144 Data size: 128395 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -750,20 +750,20 @@ STAGE PLANS: TableScan alias: alltypes_parquet_n0 filterExpr: (cint = 528534767) (type: boolean) - Statistics: Num rows: 12288 Data size: 73728 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 256791 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (cint = 528534767) (type: boolean) - Statistics: Num rows: 6144 Data size: 36864 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6144 Data size: 128395 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: 528534767 (type: int), ctinyint (type: tinyint), csmallint (type: smallint), cfloat (type: float), cdouble (type: double), cstring1 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 6144 Data size: 36864 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6144 Data size: 128395 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 10 - Statistics: Num rows: 10 Data size: 60 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 200 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 10 Data size: 60 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 200 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -850,22 +850,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypes_parquet_n0 - Statistics: Num rows: 12288 Data size: 73728 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 256791 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ctinyint (type: tinyint), cint (type: int), csmallint (type: smallint), cstring1 (type: string), cfloat (type: float), cdouble (type: double), (cdouble * cdouble) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 12288 Data size: 73728 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 256791 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: max(_col1), min(_col2), count(_col3), sum(_col4), count(_col4), sum(_col6), sum(_col5), count(_col5) keys: _col0 (type: tinyint) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 12288 Data size: 73728 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 256791 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: tinyint) sort order: + Map-reduce partition columns: _col0 (type: tinyint) - Statistics: Num rows: 12288 Data size: 73728 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 256791 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col2 (type: smallint), _col3 (type: bigint), _col4 (type: double), _col5 (type: bigint), _col6 (type: double), _col7 (type: double), _col8 (type: bigint) Execution mode: vectorized Map Vectorization: @@ -891,14 +891,14 @@ STAGE PLANS: keys: KEY._col0 (type: tinyint) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 6144 Data size: 36864 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6144 Data size: 128395 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: tinyint), _col1 (type: int), _col2 (type: smallint), _col3 (type: bigint), (_col4 / _col5) (type: double), power(((_col6 - ((_col7 * _col7) / _col8)) / _col8), 0.5) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 6144 Data size: 36864 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6144 Data size: 128395 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 6144 Data size: 36864 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6144 Data size: 128395 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1565,17 +1565,17 @@ STAGE PLANS: TableScan alias: parquettbl filterExpr: (UDFToInteger((t1 + t2)) > 10) (type: boolean) - Statistics: Num rows: 3 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 126 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (UDFToInteger((t1 + t2)) > 10) (type: boolean) - Statistics: Num rows: 1 Data size: 2 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 42 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: t1 (type: tinyint), t2 (type: tinyint), (t1 + t2) (type: tinyint) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 2 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 42 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 2 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 42 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/spark/vectorization_parquet_projection.q.out ql/src/test/results/clientpositive/spark/vectorization_parquet_projection.q.out index 44667a54d7..87ab29bf88 100644 --- ql/src/test/results/clientpositive/spark/vectorization_parquet_projection.q.out +++ ql/src/test/results/clientpositive/spark/vectorization_parquet_projection.q.out @@ -139,14 +139,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: parquet_project_test - Statistics: Num rows: 22 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 22 Data size: 709 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cint (type: int), m1 (type: map) outputColumnNames: _col0, _col1 - Statistics: Num rows: 22 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 22 Data size: 709 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 22 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 22 Data size: 709 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -221,9 +221,9 @@ STAGE PLANS: Map Operator Tree: TableScan alias: parquet_project_test - Statistics: Num rows: 22 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 22 Data size: 709 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - Statistics: Num rows: 22 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 22 Data size: 709 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash @@ -309,22 +309,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: parquet_project_test - Statistics: Num rows: 22 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 22 Data size: 709 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cint (type: int) outputColumnNames: cint - Statistics: Num rows: 22 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 22 Data size: 709 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() keys: cint (type: int) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 22 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 22 Data size: 709 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 22 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 22 Data size: 709 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Execution mode: vectorized Map Vectorization: @@ -350,10 +350,10 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 11 Data size: 22 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 11 Data size: 354 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 11 Data size: 22 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 11 Data size: 354 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -407,22 +407,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: parquet_project_test - Statistics: Num rows: 22 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 22 Data size: 709 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: m1['color'] (type: string) outputColumnNames: _col0 - Statistics: Num rows: 22 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 22 Data size: 709 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 22 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 22 Data size: 709 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 22 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 22 Data size: 709 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Map Vectorization: enabled: true @@ -444,10 +444,10 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 11 Data size: 22 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 11 Data size: 354 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 11 Data size: 22 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 11 Data size: 354 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -524,9 +524,9 @@ STAGE PLANS: TableScan alias: parquet_nullsplit filterExpr: (len = '1') (type: boolean) - Statistics: Num rows: 1 Data size: 2 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 108 Basic stats: COMPLETE Column stats: NONE Select Operator - Statistics: Num rows: 1 Data size: 2 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 108 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() mode: hash diff --git ql/src/test/results/clientpositive/vectorization_parquet_projection.q.out ql/src/test/results/clientpositive/vectorization_parquet_projection.q.out index b52858d6b5..e141ab73e8 100644 --- ql/src/test/results/clientpositive/vectorization_parquet_projection.q.out +++ ql/src/test/results/clientpositive/vectorization_parquet_projection.q.out @@ -213,9 +213,9 @@ STAGE PLANS: Map Operator Tree: TableScan alias: parquet_project_test - Statistics: Num rows: 22 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 22 Data size: 709 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - Statistics: Num rows: 22 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 22 Data size: 709 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash @@ -489,9 +489,9 @@ STAGE PLANS: TableScan alias: parquet_nullsplit filterExpr: (len = '1') (type: boolean) - Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 116 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 116 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash