diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorColumnAssignFactory.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorColumnAssignFactory.java index d1a75df..7052acf 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorColumnAssignFactory.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorColumnAssignFactory.java @@ -23,6 +23,7 @@ import java.util.List; import java.util.Map; +import org.apache.hadoop.hive.ql.io.parquet.writable.BinaryWritable; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.serde2.io.ByteWritable; import org.apache.hadoop.hive.serde2.io.DoubleWritable; @@ -30,6 +31,7 @@ import org.apache.hadoop.hive.serde2.io.TimestampWritable; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; import org.apache.hadoop.hive.serde2.objectinspector.StructField; import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; import org.apache.hadoop.io.BooleanWritable; @@ -37,6 +39,7 @@ import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; +import org.apache.hadoop.io.Writable; /** * This class is used as a static factory for VectorColumnAssign. @@ -185,10 +188,31 @@ protected void copyValue(BytesColumnVector src, int srcIndex, int destIndex) { public static VectorColumnAssign buildObjectAssign(VectorizedRowBatch outputBatch, int outColIndex, ObjectInspector objInspector) throws HiveException { PrimitiveObjectInspector poi = (PrimitiveObjectInspector) objInspector; + return buildObjectAssign(outputBatch, outColIndex, poi.getPrimitiveCategory()); + } + + public static VectorColumnAssign buildObjectAssign(VectorizedRowBatch outputBatch, + int outColIndex, PrimitiveCategory category) throws HiveException { VectorColumnAssign outVCA = null; ColumnVector destCol = outputBatch.cols[outColIndex]; - if (destCol instanceof LongColumnVector) { - switch(poi.getPrimitiveCategory()) { + if (destCol == null) { + switch(category) { + case VOID: + outVCA = new VectorLongColumnAssign() { + // This is a dummy assigner + @Override + public void assignObjectValue(Object val, int destIndex) throws HiveException { + // This is no-op, there is no column to assign to and val is expected to be null + assert (val == null); + } + }; + break; + default: + throw new HiveException("Incompatible (null) vector column and primitive category " + + category); + } + } else if (destCol instanceof LongColumnVector) { + switch(category) { case BOOLEAN: outVCA = new VectorLongColumnAssign() { @Override @@ -276,11 +300,11 @@ public void assignObjectValue(Object val, int destIndex) throws HiveException { break; default: throw new HiveException("Incompatible Long vector column and primitive category " + - poi.getPrimitiveCategory()); + category); } } else if (destCol instanceof DoubleColumnVector) { - switch(poi.getPrimitiveCategory()) { + switch(category) { case DOUBLE: outVCA = new VectorDoubleColumnAssign() { @Override @@ -311,11 +335,26 @@ public void assignObjectValue(Object val, int destIndex) throws HiveException { break; default: throw new HiveException("Incompatible Double vector column and primitive category " + - poi.getPrimitiveCategory()); + category); } } else if (destCol instanceof BytesColumnVector) { - switch(poi.getPrimitiveCategory()) { + switch(category) { + case BINARY: + outVCA = new VectorBytesColumnAssign() { + @Override + public void assignObjectValue(Object val, int destIndex) throws HiveException { + if (val == null) { + assignNull(destIndex); + } + else { + BinaryWritable bw = (BinaryWritable) val; + byte[] bytes = bw.getBytes(); + assignBytes(bytes, 0, bytes.length, destIndex); + } + } + }.init(outputBatch, (BytesColumnVector) destCol); + break; case STRING: outVCA = new VectorBytesColumnAssign() { @Override @@ -333,7 +372,7 @@ public void assignObjectValue(Object val, int destIndex) throws HiveException { break; default: throw new HiveException("Incompatible Bytes vector column and primitive category " + - poi.getPrimitiveCategory()); + category); } } else { @@ -366,4 +405,39 @@ public void assignObjectValue(Object val, int destIndex) throws HiveException { } return vcas; } -} \ No newline at end of file + + public static VectorColumnAssign[] buildAssigners(VectorizedRowBatch outputBatch, + Writable[] writables) throws HiveException { + VectorColumnAssign[] vcas = new VectorColumnAssign[outputBatch.numCols]; + for (int i = 0; i < outputBatch.numCols; ++i) { + if (writables[i] == null) { + assert(outputBatch.cols[i] == null); + vcas[i] = buildObjectAssign(outputBatch, i, PrimitiveCategory.VOID); + } else if (writables[i] instanceof ByteWritable) { + vcas[i] = buildObjectAssign(outputBatch, i, PrimitiveCategory.BYTE); + } else if (writables[i] instanceof ShortWritable) { + vcas[i] = buildObjectAssign(outputBatch, i, PrimitiveCategory.SHORT); + } else if (writables[i] instanceof IntWritable) { + vcas[i] = buildObjectAssign(outputBatch, i, PrimitiveCategory.INT); + } else if (writables[i] instanceof LongWritable) { + vcas[i] = buildObjectAssign(outputBatch, i, PrimitiveCategory.LONG); + } else if (writables[i] instanceof FloatWritable) { + vcas[i] = buildObjectAssign(outputBatch, i, PrimitiveCategory.FLOAT); + } else if (writables[i] instanceof DoubleWritable) { + vcas[i] = buildObjectAssign(outputBatch, i, PrimitiveCategory.DOUBLE); + } else if (writables[i] instanceof Text) { + vcas[i] = buildObjectAssign(outputBatch, i, PrimitiveCategory.STRING); + } else if (writables[i] instanceof BinaryWritable) { + vcas[i] = buildObjectAssign(outputBatch, i, PrimitiveCategory.BINARY); + } else if (writables[i] instanceof TimestampWritable) { + vcas[i] = buildObjectAssign(outputBatch, i, PrimitiveCategory.TIMESTAMP); + } else if (writables[i] instanceof BooleanWritable) { + vcas[i] = buildObjectAssign(outputBatch, i, PrimitiveCategory.BOOLEAN); + } else { + throw new HiveException("Unimplemented vector assigner for writable type " + + writables[i].getClass()); + } + } + return vcas; + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatch.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatch.java index 0b504de..4364572 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatch.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatch.java @@ -97,6 +97,13 @@ public VectorizedRowBatch(int numCols, int size) { } /** + * Returns the maximum size of the batch (number of rows it can hold) + */ + public int getMaxSize() { + return selected.length; + } + + /** * Return count of qualifying rows. * * @return number of rows that have not been filtered out diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java index d409d44..6b6d6c0 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java @@ -402,4 +402,17 @@ private ColumnVector allocateColumnVector(String type, int defaultSize) { } } + public VectorColumnAssign[] buildObjectAssigners(VectorizedRowBatch outputBatch) + throws HiveException { + List fieldRefs = rowOI.getAllStructFieldRefs(); + assert outputBatch.numCols == fieldRefs.size(); + VectorColumnAssign[] assigners = new VectorColumnAssign[fieldRefs.size()]; + for(int i = 0; i < assigners.length; ++i) { + StructField fieldRef = fieldRefs.get(i); + ObjectInspector fieldOI = fieldRef.getFieldObjectInspector(); + assigners[i] = VectorColumnAssignFactory.buildObjectAssign( + outputBatch, i, fieldOI); + } + return assigners; + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/io/parquet/MapredParquetInputFormat.java ql/src/java/org/apache/hadoop/hive/ql/io/parquet/MapredParquetInputFormat.java index d3412df..591b01f 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/parquet/MapredParquetInputFormat.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/parquet/MapredParquetInputFormat.java @@ -14,7 +14,10 @@ package org.apache.hadoop.hive.ql.io.parquet; import java.io.IOException; - +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hive.ql.exec.Utilities; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedInputFormatInterface; import org.apache.hadoop.hive.ql.io.parquet.read.DataWritableReadSupport; import org.apache.hadoop.hive.ql.io.parquet.read.ParquetRecordReaderWrapper; import org.apache.hadoop.io.ArrayWritable; @@ -29,18 +32,25 @@ * A Parquet InputFormat for Hive (with the deprecated package mapred) * */ -public class MapredParquetInputFormat extends FileInputFormat { +public class MapredParquetInputFormat extends FileInputFormat + implements VectorizedInputFormatInterface { + + private static final Log LOG = LogFactory.getLog(MapredParquetInputFormat.class); private final ParquetInputFormat realInput; + private final transient VectorizedParquetInputFormat vectorizedSelf; + public MapredParquetInputFormat() { this(new ParquetInputFormat(DataWritableReadSupport.class)); } protected MapredParquetInputFormat(final ParquetInputFormat inputFormat) { this.realInput = inputFormat; + vectorizedSelf = new VectorizedParquetInputFormat(inputFormat); } + @SuppressWarnings({ "unchecked", "rawtypes" }) @Override public org.apache.hadoop.mapred.RecordReader getRecordReader( final org.apache.hadoop.mapred.InputSplit split, @@ -48,7 +58,19 @@ protected MapredParquetInputFormat(final ParquetInputFormat input final org.apache.hadoop.mapred.Reporter reporter ) throws IOException { try { - return (RecordReader) new ParquetRecordReaderWrapper(realInput, split, job, reporter); + if (Utilities.isVectorMode(job)) { + if (LOG.isDebugEnabled()) { + LOG.debug("Using vectorized record reader"); + } + return (RecordReader) vectorizedSelf.getRecordReader(split, job, reporter); + } + else { + if (LOG.isDebugEnabled()) { + LOG.debug("Using row-mode record reader"); + } + return (RecordReader) + new ParquetRecordReaderWrapper(realInput, split, job, reporter); + } } catch (final InterruptedException e) { throw new RuntimeException("Cannot create a RecordReaderWrapper", e); } diff --git ql/src/java/org/apache/hadoop/hive/ql/io/parquet/VectorizedParquetInputFormat.java ql/src/java/org/apache/hadoop/hive/ql/io/parquet/VectorizedParquetInputFormat.java new file mode 100644 index 0000000..30c6f44 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/io/parquet/VectorizedParquetInputFormat.java @@ -0,0 +1,161 @@ +/** + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.io.parquet; + +import java.io.IOException; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hive.ql.exec.vector.VectorColumnAssign; +import org.apache.hadoop.hive.ql.exec.vector.VectorColumnAssignFactory; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedInputFormatInterface; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatchCtx; +import org.apache.hadoop.hive.ql.io.parquet.read.ParquetRecordReaderWrapper; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.io.ArrayWritable; +import org.apache.hadoop.io.NullWritable; +import org.apache.hadoop.io.Writable; +import org.apache.hadoop.mapred.FileInputFormat; +import org.apache.hadoop.mapred.FileSplit; +import org.apache.hadoop.mapred.InputSplit; +import org.apache.hadoop.mapred.JobConf; +import org.apache.hadoop.mapred.RecordReader; +import org.apache.hadoop.mapred.Reporter; + +import parquet.hadoop.ParquetInputFormat; + +/** + * Vectorized input format for Parquet files + */ +public class VectorizedParquetInputFormat extends FileInputFormat + implements VectorizedInputFormatInterface { + + private static final Log LOG = LogFactory.getLog(VectorizedParquetInputFormat.class); + + /** + * Vectorized record reader for vectorized Parquet input format + */ + private static class VectorizedParquetRecordReader implements + RecordReader { + private static final Log LOG = LogFactory.getLog(VectorizedParquetRecordReader.class); + + private final ParquetRecordReaderWrapper internalReader; + private VectorizedRowBatchCtx rbCtx; + private ArrayWritable internalValues; + private Void internalKey; + private VectorColumnAssign[] assigners; + + public VectorizedParquetRecordReader( + ParquetInputFormat realInput, + FileSplit split, + JobConf conf, Reporter reporter) throws IOException, InterruptedException { + internalReader = new ParquetRecordReaderWrapper( + realInput, + split, + conf, + reporter); + try { + rbCtx = new VectorizedRowBatchCtx(); + rbCtx.init(conf, split); + } catch (Exception e) { + throw new RuntimeException(e); + } + + } + + @Override + public NullWritable createKey() { + internalKey = internalReader.createKey(); + return NullWritable.get(); + } + + @Override + public VectorizedRowBatch createValue() { + VectorizedRowBatch outputBatch = null; + try { + outputBatch = rbCtx.createVectorizedRowBatch(); + internalValues = internalReader.createValue(); + } catch (HiveException e) { + throw new RuntimeException("Error creating a batch", e); + } + return outputBatch; + } + + @Override + public long getPos() throws IOException { + return internalReader.getPos(); + } + + @Override + public void close() throws IOException { + internalReader.close(); + } + + @Override + public float getProgress() throws IOException { + return internalReader.getProgress(); + } + + @Override + public boolean next(NullWritable key, VectorizedRowBatch outputBatch) + throws IOException { + assert(outputBatch.numCols == assigners.length); + outputBatch.reset(); + int maxSize = outputBatch.getMaxSize(); + try { + while (outputBatch.size < maxSize) { + if (false == internalReader.next(internalKey, internalValues)) { + outputBatch.endOfFile = true; + break; + } + Writable[] writables = internalValues.get(); + + if (null == assigners) { + // Normally we'd build the assigners from the rbCtx.rowOI, but with Parquet + // we have a discrepancy between the metadata type (Eg. tinyint -> BYTE) and + // the writable value (IntWritable). see Parquet's ETypeConverter class. + assigners = VectorColumnAssignFactory.buildAssigners(outputBatch, writables); + } + + for(int i = 0; i < outputBatch.numCols; ++i) { + assigners[i].assignObjectValue(writables[i], outputBatch.size); + } + ++outputBatch.size; + } + } catch (HiveException e) { + throw new RuntimeException(e); + } + return outputBatch.size > 0; + } + } + + private final ParquetInputFormat realInput; + + public VectorizedParquetInputFormat(ParquetInputFormat realInput) { + this.realInput = realInput; + } + + @SuppressWarnings("unchecked") + @Override + public RecordReader getRecordReader( + InputSplit split, JobConf conf, Reporter reporter) throws IOException { + try { + return (RecordReader) + new VectorizedParquetRecordReader(realInput, (FileSplit) split, conf, reporter); + } catch (final InterruptedException e) { + throw new RuntimeException("Cannot create a VectorizedParquetRecordReader", e); + } + } + +} diff --git ql/src/test/queries/clientpositive/vectorized_parquet.q ql/src/test/queries/clientpositive/vectorized_parquet.q new file mode 100644 index 0000000..5ce1cf0 --- /dev/null +++ ql/src/test/queries/clientpositive/vectorized_parquet.q @@ -0,0 +1,44 @@ +create table if not exists alltypes_parquet ( + cint int, + ctinyint tinyint, + csmallint smallint, + cfloat float, + cdouble double, + cstring1 string) stored as parquet; + +insert overwrite table alltypes_parquet + select cint, + ctinyint, + csmallint, + cfloat, + cdouble, + cstring1 + from alltypesorc; + +SET hive.vectorized.execution.enabled=true; + +explain select * + from alltypes_parquet + where cint = 528534767 + limit 10; +select * + from alltypes_parquet + where cint = 528534767 + limit 10; + +explain select ctinyint, + max(cint), + min(csmallint), + count(cstring1), + avg(cfloat), + stddev_pop(cdouble) + from alltypes_parquet + group by ctinyint; +select ctinyint, + max(cint), + min(csmallint), + count(cstring1), + avg(cfloat), + stddev_pop(cdouble) + from alltypes_parquet + group by ctinyint; diff --git ql/src/test/results/clientpositive/vectorized_parquet.q.out ql/src/test/results/clientpositive/vectorized_parquet.q.out new file mode 100644 index 0000000..0bb16c9 --- /dev/null +++ ql/src/test/results/clientpositive/vectorized_parquet.q.out @@ -0,0 +1,356 @@ +PREHOOK: query: create table if not exists alltypes_parquet ( + cint int, + ctinyint tinyint, + csmallint smallint, + cfloat float, + cdouble double, + cstring1 string) stored as parquet +PREHOOK: type: CREATETABLE +POSTHOOK: query: create table if not exists alltypes_parquet ( + cint int, + ctinyint tinyint, + csmallint smallint, + cfloat float, + cdouble double, + cstring1 string) stored as parquet +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@alltypes_parquet +PREHOOK: query: insert overwrite table alltypes_parquet + select cint, + ctinyint, + csmallint, + cfloat, + cdouble, + cstring1 + from alltypesorc +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +PREHOOK: Output: default@alltypes_parquet +POSTHOOK: query: insert overwrite table alltypes_parquet + select cint, + ctinyint, + csmallint, + cfloat, + cdouble, + cstring1 + from alltypesorc +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +POSTHOOK: Output: default@alltypes_parquet +POSTHOOK: Lineage: alltypes_parquet.cdouble SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:from deserializer), ] +POSTHOOK: Lineage: alltypes_parquet.cfloat SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cfloat, type:float, comment:from deserializer), ] +POSTHOOK: Lineage: alltypes_parquet.cint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:from deserializer), ] +POSTHOOK: Lineage: alltypes_parquet.csmallint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:csmallint, type:smallint, comment:from deserializer), ] +POSTHOOK: Lineage: alltypes_parquet.cstring1 SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cstring1, type:string, comment:from deserializer), ] +POSTHOOK: Lineage: alltypes_parquet.ctinyint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:ctinyint, type:tinyint, comment:from deserializer), ] +PREHOOK: query: explain select * + from alltypes_parquet + where cint = 528534767 + limit 10 +PREHOOK: type: QUERY +POSTHOOK: query: explain select * + from alltypes_parquet + where cint = 528534767 + limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Lineage: alltypes_parquet.cdouble SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:from deserializer), ] +POSTHOOK: Lineage: alltypes_parquet.cfloat SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cfloat, type:float, comment:from deserializer), ] +POSTHOOK: Lineage: alltypes_parquet.cint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:from deserializer), ] +POSTHOOK: Lineage: alltypes_parquet.csmallint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:csmallint, type:smallint, comment:from deserializer), ] +POSTHOOK: Lineage: alltypes_parquet.cstring1 SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cstring1, type:string, comment:from deserializer), ] +POSTHOOK: Lineage: alltypes_parquet.ctinyint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:ctinyint, type:tinyint, comment:from deserializer), ] +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: alltypes_parquet + Statistics: Num rows: 2072 Data size: 257046 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (cint = 528534767) (type: boolean) + Statistics: Num rows: 1036 Data size: 128523 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: cint (type: int), ctinyint (type: tinyint), csmallint (type: smallint), cfloat (type: float), cdouble (type: double), cstring1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1036 Data size: 128523 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 1240 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 10 Data size: 1240 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: 10 + +PREHOOK: query: select * + from alltypes_parquet + where cint = 528534767 + limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypes_parquet +#### A masked pattern was here #### +POSTHOOK: query: select * + from alltypes_parquet + where cint = 528534767 + limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypes_parquet +#### A masked pattern was here #### +POSTHOOK: Lineage: alltypes_parquet.cdouble SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:from deserializer), ] +POSTHOOK: Lineage: alltypes_parquet.cfloat SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cfloat, type:float, comment:from deserializer), ] +POSTHOOK: Lineage: alltypes_parquet.cint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:from deserializer), ] +POSTHOOK: Lineage: alltypes_parquet.csmallint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:csmallint, type:smallint, comment:from deserializer), ] +POSTHOOK: Lineage: alltypes_parquet.cstring1 SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cstring1, type:string, comment:from deserializer), ] +POSTHOOK: Lineage: alltypes_parquet.ctinyint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:ctinyint, type:tinyint, comment:from deserializer), ] +528534767 -50 -13326 -50.0 -13326.0 cvLH6Eat2yFsyy7p +528534767 NULL -4213 NULL -4213.0 cvLH6Eat2yFsyy7p +528534767 -28 -15813 -28.0 -15813.0 cvLH6Eat2yFsyy7p +528534767 31 -9566 31.0 -9566.0 cvLH6Eat2yFsyy7p +528534767 -34 15007 -34.0 15007.0 cvLH6Eat2yFsyy7p +528534767 29 7021 29.0 7021.0 cvLH6Eat2yFsyy7p +528534767 31 4963 31.0 4963.0 cvLH6Eat2yFsyy7p +528534767 27 -7824 27.0 -7824.0 cvLH6Eat2yFsyy7p +528534767 -11 -15431 -11.0 -15431.0 cvLH6Eat2yFsyy7p +528534767 61 -15549 61.0 -15549.0 cvLH6Eat2yFsyy7p +PREHOOK: query: explain select ctinyint, + max(cint), + min(csmallint), + count(cstring1), + avg(cfloat), + stddev_pop(cdouble) + from alltypes_parquet + group by ctinyint +PREHOOK: type: QUERY +POSTHOOK: query: explain select ctinyint, + max(cint), + min(csmallint), + count(cstring1), + avg(cfloat), + stddev_pop(cdouble) + from alltypes_parquet + group by ctinyint +POSTHOOK: type: QUERY +POSTHOOK: Lineage: alltypes_parquet.cdouble SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:from deserializer), ] +POSTHOOK: Lineage: alltypes_parquet.cfloat SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cfloat, type:float, comment:from deserializer), ] +POSTHOOK: Lineage: alltypes_parquet.cint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:from deserializer), ] +POSTHOOK: Lineage: alltypes_parquet.csmallint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:csmallint, type:smallint, comment:from deserializer), ] +POSTHOOK: Lineage: alltypes_parquet.cstring1 SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cstring1, type:string, comment:from deserializer), ] +POSTHOOK: Lineage: alltypes_parquet.ctinyint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:ctinyint, type:tinyint, comment:from deserializer), ] +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: alltypes_parquet + Statistics: Num rows: 2072 Data size: 257046 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ctinyint (type: tinyint), cint (type: int), csmallint (type: smallint), cstring1 (type: string), cfloat (type: float), cdouble (type: double) + outputColumnNames: ctinyint, cint, csmallint, cstring1, cfloat, cdouble + Statistics: Num rows: 2072 Data size: 257046 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: max(cint), min(csmallint), count(cstring1), avg(cfloat), stddev_pop(cdouble) + keys: ctinyint (type: tinyint) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 2072 Data size: 257046 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: tinyint) + sort order: + + Map-reduce partition columns: _col0 (type: tinyint) + Statistics: Num rows: 2072 Data size: 257046 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col2 (type: smallint), _col3 (type: bigint), _col4 (type: struct), _col5 (type: struct) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: max(VALUE._col0), min(VALUE._col1), count(VALUE._col2), avg(VALUE._col3), stddev_pop(VALUE._col4) + keys: KEY._col0 (type: tinyint) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1036 Data size: 128523 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: tinyint), _col1 (type: int), _col2 (type: smallint), _col3 (type: bigint), _col4 (type: double), _col5 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1036 Data size: 128523 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1036 Data size: 128523 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + +PREHOOK: query: select ctinyint, + max(cint), + min(csmallint), + count(cstring1), + avg(cfloat), + stddev_pop(cdouble) + from alltypes_parquet + group by ctinyint +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypes_parquet +#### A masked pattern was here #### +POSTHOOK: query: select ctinyint, + max(cint), + min(csmallint), + count(cstring1), + avg(cfloat), + stddev_pop(cdouble) + from alltypes_parquet + group by ctinyint +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypes_parquet +#### A masked pattern was here #### +POSTHOOK: Lineage: alltypes_parquet.cdouble SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:from deserializer), ] +POSTHOOK: Lineage: alltypes_parquet.cfloat SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cfloat, type:float, comment:from deserializer), ] +POSTHOOK: Lineage: alltypes_parquet.cint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:from deserializer), ] +POSTHOOK: Lineage: alltypes_parquet.csmallint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:csmallint, type:smallint, comment:from deserializer), ] +POSTHOOK: Lineage: alltypes_parquet.cstring1 SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cstring1, type:string, comment:from deserializer), ] +POSTHOOK: Lineage: alltypes_parquet.ctinyint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:ctinyint, type:tinyint, comment:from deserializer), ] +NULL 1073418988 -16379 3115 NULL 305051.4870777435 +-64 626923679 -15920 21 -64.0 9254.456539277186 +-63 626923679 -12516 16 -63.0 9263.605837223322 +-62 626923679 -15992 24 -62.0 9004.593091474135 +-61 626923679 -15142 22 -61.0 9357.236187870849 +-60 626923679 -15792 24 -60.0 9892.656196775464 +-59 626923679 -15789 28 -59.0 9829.790704244733 +-58 626923679 -15169 20 -58.0 9549.096672008198 +-57 626923679 -14893 32 -57.0 8572.083461570477 +-56 626923679 -11999 33 -56.0 9490.842152672341 +-55 626923679 -13381 26 -55.0 9157.562103946742 +-54 626923679 -14815 23 -54.0 9614.154026896626 +-53 626923679 -15445 19 -53.0 9387.739325499799 +-52 626923679 -16369 30 -52.0 8625.06871423408 +-51 1073680599 -15734 1028 -51.0 9531.569305177045 +-50 626923679 -14320 27 -50.0 8548.827748002343 +-49 626923679 -14831 23 -49.0 9894.429191738676 +-48 626923679 -15462 26 -48.0 9913.883371354861 +-47 626923679 -16096 19 -47.0 9011.009178780589 +-46 626923679 -12427 21 -46.0 9182.943188188632 +-45 626923679 -15027 21 -45.0 8567.489593562543 +-44 626923679 -15667 21 -44.0 10334.01810499552 +-43 626923679 -15607 27 -43.0 8715.255026265124 +-42 626923679 -16025 14 -42.0 9692.646755759979 +-41 626923679 -12606 21 -41.0 9034.40949481481 +-40 626923679 -14678 23 -40.0 9883.334986561835 +-39 626923679 -15612 19 -39.0 9765.551806305297 +-38 626923679 -14914 28 -38.0 8767.375358291503 +-37 626923679 -14780 17 -37.0 10368.905538788269 +-36 626923679 -16208 23 -36.0 8773.547684436919 +-35 626923679 -16059 23 -35.0 10136.580492864763 +-34 626923679 -15450 29 -34.0 8708.243526705026 +-33 626923679 -12779 21 -33.0 8854.331159704514 +-32 626923679 -15866 25 -32.0 9535.546396775915 +-31 626923679 -15915 22 -31.0 9187.596784112568 +-30 626923679 -14863 23 -30.0 9193.941914019653 +-29 626923679 -14747 26 -29.0 9052.945656011721 +-28 626923679 -15813 20 -28.0 9616.869413270924 +-27 626923679 -14984 20 -27.0 8465.29660255097 +-26 626923679 -15686 15 -26.0 10874.523900405318 +-25 626923679 -15862 24 -25.0 9778.256724727018 +-24 626923679 -16311 26 -24.0 9386.736402961187 +-23 626923679 -16355 36 -23.345263230173213 9401.831290253447 +-22 626923679 -14701 22 -22.0 8809.230165774987 +-21 626923679 -16017 27 -21.0 9480.349236669877 +-20 626923679 -16126 24 -20.0 9868.92268080106 +-19 626923679 -15935 25 -19.0 9967.22240685782 +-18 626923679 -14863 24 -18.0 9638.430684071413 +-17 626923679 -15922 19 -17.0 9944.104273894172 +-16 626923679 -15154 21 -16.0 8884.207393686478 +-15 626923679 -16036 24 -15.0 9450.506254395024 +-14 626923679 -13884 22 -14.0 10125.818731386042 +-13 626923679 -15446 30 -13.0 8907.942987576693 +-12 626923679 -16373 22 -12.0 10173.15707541171 +-11 626923679 -15659 32 -11.0 10453.738567408038 +-10 626923679 -15384 28 -10.0 8850.451610567823 +-9 626923679 -15329 31 -9.0 8999.391457373968 +-8 626923679 -14678 18 -8.0 9976.831992670684 +-7 626923679 -14584 23 -7.0 9946.605446407746 +-6 626923679 -15980 30 -6.0 10262.829252317424 +-5 626923679 -15780 24 -5.0 10599.227726422314 +-4 626923679 -16207 21 -4.0 9682.726604102581 +-3 626923679 -13632 16 -3.0 8836.215573422822 +-2 626923679 -16277 20 -2.0 10800.090249507177 +-1 626923679 -15441 36 -1.0486250072717667 8786.246963933321 +0 626923679 -14254 24 0.0 10057.5018088718 +1 626923679 -14610 30 1.0 10016.486277900643 +2 626923679 -16227 25 2.0 10083.276127543355 +3 626923679 -16339 30 3.0 10483.526375885149 +4 626923679 -15999 29 4.0 9516.189702058042 +5 626923679 -16169 31 5.0 11114.001902469323 +6 626923679 -15948 30 6.0 9644.247255286113 +7 626923679 -15839 25 7.0 10077.151640330823 +8 1070764888 -15778 1034 8.0 9562.355155774725 +9 626923679 -13629 25 9.0 10157.217948808622 +10 626923679 -15887 26 10.0 9104.820520135108 +11 1072654057 -14696 1035 11.0 9531.018991371746 +12 626923679 -14642 18 12.0 9696.038286378725 +13 626923679 -14771 26 13.0 8128.265919972384 +14 626923679 -13367 28 14.0 9074.674998750581 +15 626923679 -16339 28 15.0 9770.473400901916 +16 626923679 -14001 26 16.0 10130.883606275334 +17 626923679 -16109 22 16.73235294865627 1353416.3383574807 +18 626923679 -15779 21 18.0 10820.004053788869 +19 626923679 -16049 21 19.0 9423.560227007669 +20 626923679 -15149 21 20.0 11161.893298093504 +21 626923679 -15931 23 21.0 9683.044864861204 +22 626923679 -16280 26 22.0 9693.155720861765 +23 626923679 -15514 24 23.0 8542.419116415425 +24 626923679 -15086 24 24.0 9661.203790645088 +25 626923679 -11349 23 25.0 8888.959012093468 +26 626923679 -14516 29 26.0 9123.125508880432 +27 626923679 -14965 24 27.0 9802.871860196345 +28 626923679 -14455 20 28.0 9283.289383115296 +29 626923679 -15892 16 29.0 9874.046501817154 +30 626923679 -14111 27 30.0 10066.520234676527 +31 626923679 -15960 24 31.0 10427.970184550613 +32 626923679 -14044 24 32.0 8376.464579403413 +33 626923679 -14642 29 40.61776386607777 1304429.5939037625 +34 626923679 -15059 28 34.0 8756.731536033676 +35 626923679 -16153 27 35.0 10351.008404963042 +36 626923679 -15912 20 36.0 9475.257975138164 +37 626923679 -12081 24 37.0 9017.860034890362 +38 626923679 -15248 29 38.0 9900.256257785535 +39 626923679 -14887 28 39.0 10513.343644635232 +40 626923679 -15861 22 40.0 9283.318678549174 +41 626923679 -13480 21 41.0 9016.291129937847 +42 626923679 -15834 28 42.0 10318.01399719996 +43 626923679 -15703 28 43.0 8757.796089055722 +44 626923679 -11185 16 44.0 9425.076634933797 +45 626923679 -15228 18 45.0 9459.968668643689 +46 626923679 -15187 22 46.0 9685.908173160062 +47 626923679 -16324 22 47.0 9822.220821743611 +48 626923679 -16372 29 48.0 10079.286173063345 +49 626923679 -15923 27 49.0 9850.111848934683 +50 626923679 -16236 21 50.0 9398.176197406601 +51 626923679 -15790 17 51.0 9220.075799194028 +52 626923679 -15450 20 52.0 9261.723648435052 +53 626923679 -16217 30 53.0 9895.247408969733 +54 626923679 -15245 16 54.0 9789.50878424882 +55 626923679 -15887 21 55.0 9826.38569192808 +56 626923679 -12631 21 56.0 8860.917133763547 +57 626923679 -15620 25 57.0 9413.99393840875 +58 626923679 -13627 20 58.0 9083.529665947459 +59 626923679 -16076 17 59.0 10117.44967077967 +60 626923679 -13606 23 60.0 8346.267436552042 +61 626923679 -15894 29 61.0 8785.714950987198 +62 626923679 -14307 17 62.0 9491.752726667326