diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorColumnAssignFactory.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorColumnAssignFactory.java index 696b421..710d20c 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorColumnAssignFactory.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorColumnAssignFactory.java @@ -26,6 +26,7 @@ import org.apache.hadoop.hive.common.type.Decimal128; import org.apache.hadoop.hive.common.type.HiveDecimal; +import org.apache.hadoop.hive.ql.io.parquet.writable.BinaryWritable; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.serde2.io.ByteWritable; import org.apache.hadoop.hive.serde2.io.DateWritable; @@ -35,6 +36,7 @@ import org.apache.hadoop.hive.serde2.io.TimestampWritable; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; import org.apache.hadoop.hive.serde2.objectinspector.StructField; import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; import org.apache.hadoop.io.BooleanWritable; @@ -42,6 +44,7 @@ import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; +import org.apache.hadoop.io.Writable; /** * This class is used as a static factory for VectorColumnAssign. @@ -215,10 +218,31 @@ protected void copyValue(DecimalColumnVector src, int srcIndex, int destIndex) { public static VectorColumnAssign buildObjectAssign(VectorizedRowBatch outputBatch, int outColIndex, ObjectInspector objInspector) throws HiveException { PrimitiveObjectInspector poi = (PrimitiveObjectInspector) objInspector; + return buildObjectAssign(outputBatch, outColIndex, poi.getPrimitiveCategory()); + } + + public static VectorColumnAssign buildObjectAssign(VectorizedRowBatch outputBatch, + int outColIndex, PrimitiveCategory category) throws HiveException { VectorColumnAssign outVCA = null; ColumnVector destCol = outputBatch.cols[outColIndex]; - if (destCol instanceof LongColumnVector) { - switch(poi.getPrimitiveCategory()) { + if (destCol == null) { + switch(category) { + case VOID: + outVCA = new VectorLongColumnAssign() { + // This is a dummy assigner + @Override + public void assignObjectValue(Object val, int destIndex) throws HiveException { + // This is no-op, there is no column to assign to and val is expected to be null + assert (val == null); + } + }; + break; + default: + throw new HiveException("Incompatible (null) vector column and primitive category " + + category); + } + } else if (destCol instanceof LongColumnVector) { + switch(category) { case BOOLEAN: outVCA = new VectorLongColumnAssign() { @Override @@ -320,11 +344,11 @@ public void assignObjectValue(Object val, int destIndex) throws HiveException { break; default: throw new HiveException("Incompatible Long vector column and primitive category " + - poi.getPrimitiveCategory()); + category); } } else if (destCol instanceof DoubleColumnVector) { - switch(poi.getPrimitiveCategory()) { + switch(category) { case DOUBLE: outVCA = new VectorDoubleColumnAssign() { @Override @@ -355,11 +379,26 @@ public void assignObjectValue(Object val, int destIndex) throws HiveException { break; default: throw new HiveException("Incompatible Double vector column and primitive category " + - poi.getPrimitiveCategory()); + category); } } else if (destCol instanceof BytesColumnVector) { - switch(poi.getPrimitiveCategory()) { + switch(category) { + case BINARY: + outVCA = new VectorBytesColumnAssign() { + @Override + public void assignObjectValue(Object val, int destIndex) throws HiveException { + if (val == null) { + assignNull(destIndex); + } + else { + BinaryWritable bw = (BinaryWritable) val; + byte[] bytes = bw.getBytes(); + assignBytes(bytes, 0, bytes.length, destIndex); + } + } + }.init(outputBatch, (BytesColumnVector) destCol); + break; case STRING: outVCA = new VectorBytesColumnAssign() { @Override @@ -377,11 +416,11 @@ public void assignObjectValue(Object val, int destIndex) throws HiveException { break; default: throw new HiveException("Incompatible Bytes vector column and primitive category " + - poi.getPrimitiveCategory()); + category); } } else if (destCol instanceof DecimalColumnVector) { - switch(poi.getPrimitiveCategory()) { + switch(category) { case DECIMAL: outVCA = new VectorDecimalColumnAssign() { @Override @@ -398,7 +437,7 @@ public void assignObjectValue(Object val, int destIndex) throws HiveException { break; default: throw new HiveException("Incompatible Decimal vector column and primitive category " + - poi.getPrimitiveCategory()); + category); } } else { @@ -431,4 +470,39 @@ public void assignObjectValue(Object val, int destIndex) throws HiveException { } return vcas; } -} \ No newline at end of file + + public static VectorColumnAssign[] buildAssigners(VectorizedRowBatch outputBatch, + Writable[] writables) throws HiveException { + VectorColumnAssign[] vcas = new VectorColumnAssign[outputBatch.numCols]; + for (int i = 0; i < outputBatch.numCols; ++i) { + if (writables[i] == null) { + assert(outputBatch.cols[i] == null); + vcas[i] = buildObjectAssign(outputBatch, i, PrimitiveCategory.VOID); + } else if (writables[i] instanceof ByteWritable) { + vcas[i] = buildObjectAssign(outputBatch, i, PrimitiveCategory.BYTE); + } else if (writables[i] instanceof ShortWritable) { + vcas[i] = buildObjectAssign(outputBatch, i, PrimitiveCategory.SHORT); + } else if (writables[i] instanceof IntWritable) { + vcas[i] = buildObjectAssign(outputBatch, i, PrimitiveCategory.INT); + } else if (writables[i] instanceof LongWritable) { + vcas[i] = buildObjectAssign(outputBatch, i, PrimitiveCategory.LONG); + } else if (writables[i] instanceof FloatWritable) { + vcas[i] = buildObjectAssign(outputBatch, i, PrimitiveCategory.FLOAT); + } else if (writables[i] instanceof DoubleWritable) { + vcas[i] = buildObjectAssign(outputBatch, i, PrimitiveCategory.DOUBLE); + } else if (writables[i] instanceof Text) { + vcas[i] = buildObjectAssign(outputBatch, i, PrimitiveCategory.STRING); + } else if (writables[i] instanceof BinaryWritable) { + vcas[i] = buildObjectAssign(outputBatch, i, PrimitiveCategory.BINARY); + } else if (writables[i] instanceof TimestampWritable) { + vcas[i] = buildObjectAssign(outputBatch, i, PrimitiveCategory.TIMESTAMP); + } else if (writables[i] instanceof BooleanWritable) { + vcas[i] = buildObjectAssign(outputBatch, i, PrimitiveCategory.BOOLEAN); + } else { + throw new HiveException("Unimplemented vector assigner for writable type " + + writables[i].getClass()); + } + } + return vcas; + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatch.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatch.java index 0b504de..4364572 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatch.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatch.java @@ -97,6 +97,13 @@ public VectorizedRowBatch(int numCols, int size) { } /** + * Returns the maximum size of the batch (number of rows it can hold) + */ + public int getMaxSize() { + return selected.length; + } + + /** * Return count of qualifying rows. * * @return number of rows that have not been filtered out diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java index e1f3c3d..c94cb4e 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java @@ -582,4 +582,17 @@ private ColumnVector allocateColumnVector(String type, int defaultSize) { } } + public VectorColumnAssign[] buildObjectAssigners(VectorizedRowBatch outputBatch) + throws HiveException { + List fieldRefs = rowOI.getAllStructFieldRefs(); + assert outputBatch.numCols == fieldRefs.size(); + VectorColumnAssign[] assigners = new VectorColumnAssign[fieldRefs.size()]; + for(int i = 0; i < assigners.length; ++i) { + StructField fieldRef = fieldRefs.get(i); + ObjectInspector fieldOI = fieldRef.getFieldObjectInspector(); + assigners[i] = VectorColumnAssignFactory.buildObjectAssign( + outputBatch, i, fieldOI); + } + return assigners; + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/io/parquet/MapredParquetInputFormat.java ql/src/java/org/apache/hadoop/hive/ql/io/parquet/MapredParquetInputFormat.java index d3412df..51b5082 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/parquet/MapredParquetInputFormat.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/parquet/MapredParquetInputFormat.java @@ -14,7 +14,10 @@ package org.apache.hadoop.hive.ql.io.parquet; import java.io.IOException; - +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hive.ql.exec.Utilities; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedInputFormatInterface; import org.apache.hadoop.hive.ql.io.parquet.read.DataWritableReadSupport; import org.apache.hadoop.hive.ql.io.parquet.read.ParquetRecordReaderWrapper; import org.apache.hadoop.io.ArrayWritable; @@ -29,18 +32,25 @@ * A Parquet InputFormat for Hive (with the deprecated package mapred) * */ -public class MapredParquetInputFormat extends FileInputFormat { +public class MapredParquetInputFormat extends FileInputFormat + implements VectorizedInputFormatInterface { + + private static final Log LOG = LogFactory.getLog(MapredParquetInputFormat.class); private final ParquetInputFormat realInput; + private final transient VectorizedParquetInputFormat vectorizedSelf; + public MapredParquetInputFormat() { this(new ParquetInputFormat(DataWritableReadSupport.class)); } protected MapredParquetInputFormat(final ParquetInputFormat inputFormat) { this.realInput = inputFormat; + vectorizedSelf = new VectorizedParquetInputFormat(inputFormat); } + @SuppressWarnings({ "unchecked", "rawtypes" }) @Override public org.apache.hadoop.mapred.RecordReader getRecordReader( final org.apache.hadoop.mapred.InputSplit split, @@ -48,7 +58,19 @@ protected MapredParquetInputFormat(final ParquetInputFormat input final org.apache.hadoop.mapred.Reporter reporter ) throws IOException { try { - return (RecordReader) new ParquetRecordReaderWrapper(realInput, split, job, reporter); + if (Utilities.isVectorMode(job)) { + if (LOG.isDebugEnabled()) { + LOG.debug("Using vectorized record reader"); + } + return (RecordReader) vectorizedSelf.getRecordReader(split, job, reporter); + } + else { + if (LOG.isDebugEnabled()) { + LOG.debug("Using row-mode record reader"); + } + return (RecordReader) + new ParquetRecordReaderWrapper(realInput, split, job, reporter); + } } catch (final InterruptedException e) { throw new RuntimeException("Cannot create a RecordReaderWrapper", e); } diff --git ql/src/java/org/apache/hadoop/hive/ql/io/parquet/VectorizedParquetInputFormat.java ql/src/java/org/apache/hadoop/hive/ql/io/parquet/VectorizedParquetInputFormat.java new file mode 100644 index 0000000..ca71bbf --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/io/parquet/VectorizedParquetInputFormat.java @@ -0,0 +1,160 @@ +/** + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.io.parquet; + +import java.io.IOException; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hive.ql.exec.vector.VectorColumnAssign; +import org.apache.hadoop.hive.ql.exec.vector.VectorColumnAssignFactory; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedInputFormatInterface; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatchCtx; +import org.apache.hadoop.hive.ql.io.parquet.read.ParquetRecordReaderWrapper; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.io.ArrayWritable; +import org.apache.hadoop.io.NullWritable; +import org.apache.hadoop.io.Writable; +import org.apache.hadoop.mapred.FileInputFormat; +import org.apache.hadoop.mapred.FileSplit; +import org.apache.hadoop.mapred.InputSplit; +import org.apache.hadoop.mapred.JobConf; +import org.apache.hadoop.mapred.RecordReader; +import org.apache.hadoop.mapred.Reporter; + +import parquet.hadoop.ParquetInputFormat; + +/** + * Vectorized input format for Parquet files + */ +public class VectorizedParquetInputFormat extends FileInputFormat + implements VectorizedInputFormatInterface { + + private static final Log LOG = LogFactory.getLog(VectorizedParquetInputFormat.class); + + /** + * Vectorized record reader for vectorized Parquet input format + */ + private static class VectorizedParquetRecordReader implements + RecordReader { + private static final Log LOG = LogFactory.getLog(VectorizedParquetRecordReader.class); + + private final ParquetRecordReaderWrapper internalReader; + private VectorizedRowBatchCtx rbCtx; + private ArrayWritable internalValues; + private Void internalKey; + private VectorColumnAssign[] assigners; + + public VectorizedParquetRecordReader( + ParquetInputFormat realInput, + FileSplit split, + JobConf conf, Reporter reporter) throws IOException, InterruptedException { + internalReader = new ParquetRecordReaderWrapper( + realInput, + split, + conf, + reporter); + try { + rbCtx = new VectorizedRowBatchCtx(); + rbCtx.init(conf, split); + } catch (Exception e) { + throw new RuntimeException(e); + } + } + + @Override + public NullWritable createKey() { + internalKey = internalReader.createKey(); + return NullWritable.get(); + } + + @Override + public VectorizedRowBatch createValue() { + VectorizedRowBatch outputBatch = null; + try { + outputBatch = rbCtx.createVectorizedRowBatch(); + internalValues = internalReader.createValue(); + } catch (HiveException e) { + throw new RuntimeException("Error creating a batch", e); + } + return outputBatch; + } + + @Override + public long getPos() throws IOException { + return internalReader.getPos(); + } + + @Override + public void close() throws IOException { + internalReader.close(); + } + + @Override + public float getProgress() throws IOException { + return internalReader.getProgress(); + } + + @Override + public boolean next(NullWritable key, VectorizedRowBatch outputBatch) + throws IOException { + assert(outputBatch.numCols == assigners.length); + outputBatch.reset(); + int maxSize = outputBatch.getMaxSize(); + try { + while (outputBatch.size < maxSize) { + if (false == internalReader.next(internalKey, internalValues)) { + outputBatch.endOfFile = true; + break; + } + Writable[] writables = internalValues.get(); + + if (null == assigners) { + // Normally we'd build the assigners from the rbCtx.rowOI, but with Parquet + // we have a discrepancy between the metadata type (Eg. tinyint -> BYTE) and + // the writable value (IntWritable). see Parquet's ETypeConverter class. + assigners = VectorColumnAssignFactory.buildAssigners(outputBatch, writables); + } + + for(int i = 0; i < outputBatch.numCols; ++i) { + assigners[i].assignObjectValue(writables[i], outputBatch.size); + } + ++outputBatch.size; + } + } catch (HiveException e) { + throw new RuntimeException(e); + } + return outputBatch.size > 0; + } + } + + private final ParquetInputFormat realInput; + + public VectorizedParquetInputFormat(ParquetInputFormat realInput) { + this.realInput = realInput; + } + + @SuppressWarnings("unchecked") + @Override + public RecordReader getRecordReader( + InputSplit split, JobConf conf, Reporter reporter) throws IOException { + try { + return (RecordReader) + new VectorizedParquetRecordReader(realInput, (FileSplit) split, conf, reporter); + } catch (final InterruptedException e) { + throw new RuntimeException("Cannot create a VectorizedParquetRecordReader", e); + } + } + +} diff --git ql/src/test/queries/clientpositive/vectorization_part.q ql/src/test/queries/clientpositive/vectorization_part.q index 0e34585..b3f8708 100644 --- ql/src/test/queries/clientpositive/vectorization_part.q +++ ql/src/test/queries/clientpositive/vectorization_part.q @@ -1,4 +1,3 @@ -SET hive.vectorized.execution.enabled=true; CREATE TABLE alltypesorc_part(ctinyint tinyint, csmallint smallint, cint int, cbigint bigint, cfloat float, cdouble double, cstring1 string, cstring2 string, ctimestamp1 timestamp, ctimestamp2 timestamp, cboolean1 boolean, cboolean2 boolean) partitioned by (ds string) STORED AS ORC; insert overwrite table alltypesorc_part partition (ds='2011') select * from alltypesorc limit 100; insert overwrite table alltypesorc_part partition (ds='2012') select * from alltypesorc limit 100; diff --git ql/src/test/queries/clientpositive/vectorized_parquet.q ql/src/test/queries/clientpositive/vectorized_parquet.q new file mode 100644 index 0000000..5ce1cf0 --- /dev/null +++ ql/src/test/queries/clientpositive/vectorized_parquet.q @@ -0,0 +1,44 @@ +create table if not exists alltypes_parquet ( + cint int, + ctinyint tinyint, + csmallint smallint, + cfloat float, + cdouble double, + cstring1 string) stored as parquet; + +insert overwrite table alltypes_parquet + select cint, + ctinyint, + csmallint, + cfloat, + cdouble, + cstring1 + from alltypesorc; + +SET hive.vectorized.execution.enabled=true; + +explain select * + from alltypes_parquet + where cint = 528534767 + limit 10; +select * + from alltypes_parquet + where cint = 528534767 + limit 10; + +explain select ctinyint, + max(cint), + min(csmallint), + count(cstring1), + avg(cfloat), + stddev_pop(cdouble) + from alltypes_parquet + group by ctinyint; +select ctinyint, + max(cint), + min(csmallint), + count(cstring1), + avg(cfloat), + stddev_pop(cdouble) + from alltypes_parquet + group by ctinyint; diff --git ql/src/test/results/clientpositive/vectorized_parquet.q.out ql/src/test/results/clientpositive/vectorized_parquet.q.out new file mode 100644 index 0000000..6882c8f --- /dev/null +++ ql/src/test/results/clientpositive/vectorized_parquet.q.out @@ -0,0 +1,358 @@ +PREHOOK: query: create table if not exists alltypes_parquet ( + cint int, + ctinyint tinyint, + csmallint smallint, + cfloat float, + cdouble double, + cstring1 string) stored as parquet +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +POSTHOOK: query: create table if not exists alltypes_parquet ( + cint int, + ctinyint tinyint, + csmallint smallint, + cfloat float, + cdouble double, + cstring1 string) stored as parquet +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@alltypes_parquet +PREHOOK: query: insert overwrite table alltypes_parquet + select cint, + ctinyint, + csmallint, + cfloat, + cdouble, + cstring1 + from alltypesorc +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +PREHOOK: Output: default@alltypes_parquet +POSTHOOK: query: insert overwrite table alltypes_parquet + select cint, + ctinyint, + csmallint, + cfloat, + cdouble, + cstring1 + from alltypesorc +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +POSTHOOK: Output: default@alltypes_parquet +POSTHOOK: Lineage: alltypes_parquet.cdouble SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), ] +POSTHOOK: Lineage: alltypes_parquet.cfloat SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cfloat, type:float, comment:null), ] +POSTHOOK: Lineage: alltypes_parquet.cint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:null), ] +POSTHOOK: Lineage: alltypes_parquet.csmallint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:csmallint, type:smallint, comment:null), ] +POSTHOOK: Lineage: alltypes_parquet.cstring1 SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cstring1, type:string, comment:null), ] +POSTHOOK: Lineage: alltypes_parquet.ctinyint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:ctinyint, type:tinyint, comment:null), ] +PREHOOK: query: explain select * + from alltypes_parquet + where cint = 528534767 + limit 10 +PREHOOK: type: QUERY +POSTHOOK: query: explain select * + from alltypes_parquet + where cint = 528534767 + limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Lineage: alltypes_parquet.cdouble SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), ] +POSTHOOK: Lineage: alltypes_parquet.cfloat SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cfloat, type:float, comment:null), ] +POSTHOOK: Lineage: alltypes_parquet.cint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:null), ] +POSTHOOK: Lineage: alltypes_parquet.csmallint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:csmallint, type:smallint, comment:null), ] +POSTHOOK: Lineage: alltypes_parquet.cstring1 SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cstring1, type:string, comment:null), ] +POSTHOOK: Lineage: alltypes_parquet.ctinyint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:ctinyint, type:tinyint, comment:null), ] +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: alltypes_parquet + Statistics: Num rows: 12288 Data size: 73728 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (cint = 528534767) (type: boolean) + Statistics: Num rows: 6144 Data size: 36864 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: cint (type: int), ctinyint (type: tinyint), csmallint (type: smallint), cfloat (type: float), cdouble (type: double), cstring1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 6144 Data size: 36864 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 60 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 10 Data size: 60 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: 10 + +PREHOOK: query: select * + from alltypes_parquet + where cint = 528534767 + limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypes_parquet +#### A masked pattern was here #### +POSTHOOK: query: select * + from alltypes_parquet + where cint = 528534767 + limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypes_parquet +#### A masked pattern was here #### +POSTHOOK: Lineage: alltypes_parquet.cdouble SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), ] +POSTHOOK: Lineage: alltypes_parquet.cfloat SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cfloat, type:float, comment:null), ] +POSTHOOK: Lineage: alltypes_parquet.cint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:null), ] +POSTHOOK: Lineage: alltypes_parquet.csmallint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:csmallint, type:smallint, comment:null), ] +POSTHOOK: Lineage: alltypes_parquet.cstring1 SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cstring1, type:string, comment:null), ] +POSTHOOK: Lineage: alltypes_parquet.ctinyint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:ctinyint, type:tinyint, comment:null), ] +528534767 -50 -13326 -50.0 -13326.0 cvLH6Eat2yFsyy7p +528534767 NULL -4213 NULL -4213.0 cvLH6Eat2yFsyy7p +528534767 -28 -15813 -28.0 -15813.0 cvLH6Eat2yFsyy7p +528534767 31 -9566 31.0 -9566.0 cvLH6Eat2yFsyy7p +528534767 -34 15007 -34.0 15007.0 cvLH6Eat2yFsyy7p +528534767 29 7021 29.0 7021.0 cvLH6Eat2yFsyy7p +528534767 31 4963 31.0 4963.0 cvLH6Eat2yFsyy7p +528534767 27 -7824 27.0 -7824.0 cvLH6Eat2yFsyy7p +528534767 -11 -15431 -11.0 -15431.0 cvLH6Eat2yFsyy7p +528534767 61 -15549 61.0 -15549.0 cvLH6Eat2yFsyy7p +PREHOOK: query: explain select ctinyint, + max(cint), + min(csmallint), + count(cstring1), + avg(cfloat), + stddev_pop(cdouble) + from alltypes_parquet + group by ctinyint +PREHOOK: type: QUERY +POSTHOOK: query: explain select ctinyint, + max(cint), + min(csmallint), + count(cstring1), + avg(cfloat), + stddev_pop(cdouble) + from alltypes_parquet + group by ctinyint +POSTHOOK: type: QUERY +POSTHOOK: Lineage: alltypes_parquet.cdouble SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), ] +POSTHOOK: Lineage: alltypes_parquet.cfloat SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cfloat, type:float, comment:null), ] +POSTHOOK: Lineage: alltypes_parquet.cint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:null), ] +POSTHOOK: Lineage: alltypes_parquet.csmallint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:csmallint, type:smallint, comment:null), ] +POSTHOOK: Lineage: alltypes_parquet.cstring1 SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cstring1, type:string, comment:null), ] +POSTHOOK: Lineage: alltypes_parquet.ctinyint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:ctinyint, type:tinyint, comment:null), ] +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: alltypes_parquet + Statistics: Num rows: 12288 Data size: 73728 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ctinyint (type: tinyint), cint (type: int), csmallint (type: smallint), cstring1 (type: string), cfloat (type: float), cdouble (type: double) + outputColumnNames: ctinyint, cint, csmallint, cstring1, cfloat, cdouble + Statistics: Num rows: 12288 Data size: 73728 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: max(cint), min(csmallint), count(cstring1), avg(cfloat), stddev_pop(cdouble) + keys: ctinyint (type: tinyint) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 12288 Data size: 73728 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: tinyint) + sort order: + + Map-reduce partition columns: _col0 (type: tinyint) + Statistics: Num rows: 12288 Data size: 73728 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col2 (type: smallint), _col3 (type: bigint), _col4 (type: struct), _col5 (type: struct) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: max(VALUE._col0), min(VALUE._col1), count(VALUE._col2), avg(VALUE._col3), stddev_pop(VALUE._col4) + keys: KEY._col0 (type: tinyint) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 6144 Data size: 36864 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: tinyint), _col1 (type: int), _col2 (type: smallint), _col3 (type: bigint), _col4 (type: double), _col5 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 6144 Data size: 36864 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 6144 Data size: 36864 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + +PREHOOK: query: select ctinyint, + max(cint), + min(csmallint), + count(cstring1), + avg(cfloat), + stddev_pop(cdouble) + from alltypes_parquet + group by ctinyint +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypes_parquet +#### A masked pattern was here #### +POSTHOOK: query: select ctinyint, + max(cint), + min(csmallint), + count(cstring1), + avg(cfloat), + stddev_pop(cdouble) + from alltypes_parquet + group by ctinyint +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypes_parquet +#### A masked pattern was here #### +POSTHOOK: Lineage: alltypes_parquet.cdouble SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), ] +POSTHOOK: Lineage: alltypes_parquet.cfloat SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cfloat, type:float, comment:null), ] +POSTHOOK: Lineage: alltypes_parquet.cint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:null), ] +POSTHOOK: Lineage: alltypes_parquet.csmallint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:csmallint, type:smallint, comment:null), ] +POSTHOOK: Lineage: alltypes_parquet.cstring1 SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cstring1, type:string, comment:null), ] +POSTHOOK: Lineage: alltypes_parquet.ctinyint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:ctinyint, type:tinyint, comment:null), ] +NULL 1073418988 -16379 3115 NULL 305051.4870777435 +-64 626923679 -15920 21 -64.0 9254.456539277186 +-63 626923679 -12516 16 -63.0 9263.605837223322 +-62 626923679 -15992 24 -62.0 9004.593091474135 +-61 626923679 -15142 22 -61.0 9357.236187870849 +-60 626923679 -15792 24 -60.0 9892.656196775464 +-59 626923679 -15789 28 -59.0 9829.790704244733 +-58 626923679 -15169 20 -58.0 9549.096672008198 +-57 626923679 -14893 32 -57.0 8572.083461570477 +-56 626923679 -11999 33 -56.0 9490.842152672341 +-55 626923679 -13381 26 -55.0 9157.562103946742 +-54 626923679 -14815 23 -54.0 9614.154026896626 +-53 626923679 -15445 19 -53.0 9387.739325499799 +-52 626923679 -16369 30 -52.0 8625.06871423408 +-51 1073680599 -15734 1028 -51.0 9531.569305177045 +-50 626923679 -14320 27 -50.0 8548.827748002343 +-49 626923679 -14831 23 -49.0 9894.429191738676 +-48 626923679 -15462 26 -48.0 9913.883371354861 +-47 626923679 -16096 19 -47.0 9011.009178780589 +-46 626923679 -12427 21 -46.0 9182.943188188632 +-45 626923679 -15027 21 -45.0 8567.489593562543 +-44 626923679 -15667 21 -44.0 10334.01810499552 +-43 626923679 -15607 27 -43.0 8715.255026265124 +-42 626923679 -16025 14 -42.0 9692.646755759979 +-41 626923679 -12606 21 -41.0 9034.40949481481 +-40 626923679 -14678 23 -40.0 9883.334986561835 +-39 626923679 -15612 19 -39.0 9765.551806305297 +-38 626923679 -14914 28 -38.0 8767.375358291503 +-37 626923679 -14780 17 -37.0 10368.905538788269 +-36 626923679 -16208 23 -36.0 8773.547684436919 +-35 626923679 -16059 23 -35.0 10136.580492864763 +-34 626923679 -15450 29 -34.0 8708.243526705026 +-33 626923679 -12779 21 -33.0 8854.331159704514 +-32 626923679 -15866 25 -32.0 9535.546396775915 +-31 626923679 -15915 22 -31.0 9187.596784112568 +-30 626923679 -14863 23 -30.0 9193.941914019653 +-29 626923679 -14747 26 -29.0 9052.945656011721 +-28 626923679 -15813 20 -28.0 9616.869413270924 +-27 626923679 -14984 20 -27.0 8465.29660255097 +-26 626923679 -15686 15 -26.0 10874.523900405318 +-25 626923679 -15862 24 -25.0 9778.256724727018 +-24 626923679 -16311 26 -24.0 9386.736402961187 +-23 626923679 -16355 36 -23.345263230173213 9401.831290253447 +-22 626923679 -14701 22 -22.0 8809.230165774987 +-21 626923679 -16017 27 -21.0 9480.349236669877 +-20 626923679 -16126 24 -20.0 9868.92268080106 +-19 626923679 -15935 25 -19.0 9967.22240685782 +-18 626923679 -14863 24 -18.0 9638.430684071413 +-17 626923679 -15922 19 -17.0 9944.104273894172 +-16 626923679 -15154 21 -16.0 8884.207393686478 +-15 626923679 -16036 24 -15.0 9450.506254395024 +-14 626923679 -13884 22 -14.0 10125.818731386042 +-13 626923679 -15446 30 -13.0 8907.942987576693 +-12 626923679 -16373 22 -12.0 10173.15707541171 +-11 626923679 -15659 32 -11.0 10453.738567408038 +-10 626923679 -15384 28 -10.0 8850.451610567823 +-9 626923679 -15329 31 -9.0 8999.391457373968 +-8 626923679 -14678 18 -8.0 9976.831992670684 +-7 626923679 -14584 23 -7.0 9946.605446407746 +-6 626923679 -15980 30 -6.0 10262.829252317424 +-5 626923679 -15780 24 -5.0 10599.227726422314 +-4 626923679 -16207 21 -4.0 9682.726604102581 +-3 626923679 -13632 16 -3.0 8836.215573422822 +-2 626923679 -16277 20 -2.0 10800.090249507177 +-1 626923679 -15441 36 -1.0486250072717667 8786.246963933321 +0 626923679 -14254 24 0.0 10057.5018088718 +1 626923679 -14610 30 1.0 10016.486277900643 +2 626923679 -16227 25 2.0 10083.276127543355 +3 626923679 -16339 30 3.0 10483.526375885149 +4 626923679 -15999 29 4.0 9516.189702058042 +5 626923679 -16169 31 5.0 11114.001902469323 +6 626923679 -15948 30 6.0 9644.247255286113 +7 626923679 -15839 25 7.0 10077.151640330823 +8 1070764888 -15778 1034 8.0 9562.355155774725 +9 626923679 -13629 25 9.0 10157.217948808622 +10 626923679 -15887 26 10.0 9104.820520135108 +11 1072654057 -14696 1035 11.0 9531.018991371746 +12 626923679 -14642 18 12.0 9696.038286378725 +13 626923679 -14771 26 13.0 8128.265919972384 +14 626923679 -13367 28 14.0 9074.674998750581 +15 626923679 -16339 28 15.0 9770.473400901916 +16 626923679 -14001 26 16.0 10130.883606275334 +17 626923679 -16109 22 16.73235294865627 1353416.3383574807 +18 626923679 -15779 21 18.0 10820.004053788869 +19 626923679 -16049 21 19.0 9423.560227007669 +20 626923679 -15149 21 20.0 11161.893298093504 +21 626923679 -15931 23 21.0 9683.044864861204 +22 626923679 -16280 26 22.0 9693.155720861765 +23 626923679 -15514 24 23.0 8542.419116415425 +24 626923679 -15086 24 24.0 9661.203790645088 +25 626923679 -11349 23 25.0 8888.959012093468 +26 626923679 -14516 29 26.0 9123.125508880432 +27 626923679 -14965 24 27.0 9802.871860196345 +28 626923679 -14455 20 28.0 9283.289383115296 +29 626923679 -15892 16 29.0 9874.046501817154 +30 626923679 -14111 27 30.0 10066.520234676527 +31 626923679 -15960 24 31.0 10427.970184550613 +32 626923679 -14044 24 32.0 8376.464579403413 +33 626923679 -14642 29 40.61776386607777 1304429.5939037625 +34 626923679 -15059 28 34.0 8756.731536033676 +35 626923679 -16153 27 35.0 10351.008404963042 +36 626923679 -15912 20 36.0 9475.257975138164 +37 626923679 -12081 24 37.0 9017.860034890362 +38 626923679 -15248 29 38.0 9900.256257785535 +39 626923679 -14887 28 39.0 10513.343644635232 +40 626923679 -15861 22 40.0 9283.318678549174 +41 626923679 -13480 21 41.0 9016.291129937847 +42 626923679 -15834 28 42.0 10318.01399719996 +43 626923679 -15703 28 43.0 8757.796089055722 +44 626923679 -11185 16 44.0 9425.076634933797 +45 626923679 -15228 18 45.0 9459.968668643689 +46 626923679 -15187 22 46.0 9685.908173160062 +47 626923679 -16324 22 47.0 9822.220821743611 +48 626923679 -16372 29 48.0 10079.286173063345 +49 626923679 -15923 27 49.0 9850.111848934683 +50 626923679 -16236 21 50.0 9398.176197406601 +51 626923679 -15790 17 51.0 9220.075799194028 +52 626923679 -15450 20 52.0 9261.723648435052 +53 626923679 -16217 30 53.0 9895.247408969733 +54 626923679 -15245 16 54.0 9789.50878424882 +55 626923679 -15887 21 55.0 9826.38569192808 +56 626923679 -12631 21 56.0 8860.917133763547 +57 626923679 -15620 25 57.0 9413.99393840875 +58 626923679 -13627 20 58.0 9083.529665947459 +59 626923679 -16076 17 59.0 10117.44967077967 +60 626923679 -13606 23 60.0 8346.267436552042 +61 626923679 -15894 29 61.0 8785.714950987198 +62 626923679 -14307 17 62.0 9491.752726667326