POSTHOOK: query: explain vectorization detail insert overwrite table ctas_part partition (modkey) select key, value, ceil(key / 100) from src where key is not null order by key limit 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Output: default@ctas_part PLAN VECTORIZATION: enabled: true enabledConditionsMet: [hive.vectorized.execution.enabled IS true] STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: src filterExpr: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ROW__ID:struct] Filter Operator Filter Vectorization: className: VectorFilterOperator native: true predicateExpression: SelectColumnIsNotNull(col 0:string) predicate: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Top N Key Operator sort order: + keys: key (type: string) null sort order: z Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE top n: 10 Top N Key Vectorization: className: VectorTopNKeyOperator keyExpressions: col 0:string native: true Select Operator expressions: key (type: string), value (type: string), ceil((UDFToDouble(key) / 100.0D)) (type: bigint) outputColumnNames: _col0, _col1, _col2 Select Vectorization: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1, 5] selectExpressions: FuncCeilDoubleToLong(col 4:double)(children: DoubleColDivideDoubleScalar(col 3:double, val 100.0)(children: CastStringToDouble(col 0:string) -> 3:double) -> 4:double) -> 5:bigint Statistics: Num rows: 500 Data size: 93000 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true valueColumns: 1:string, 5:bigint Statistics: Num rows: 500 Data size: 93000 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string), _col2 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.mapred.TextInputFormat allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: dataColumnCount: 2 includeColumns: [0, 1] dataColumns: key:string, value:string partitionColumnCount: 0 scratchColumnTypeNames: [double, double, bigint] Reducer 2 Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true reduceColumnNullOrder: z reduceColumnSortOrder: + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: dataColumnCount: 3 dataColumns: KEY.reducesinkkey0:string, VALUE._col0:string, VALUE._col1:bigint partitionColumnCount: 0 scratchColumnTypeNames: [bigint] Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string), VALUE._col1 (type: bigint) outputColumnNames: _col0, _col1, _col2 Select Vectorization: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1, 2] Statistics: Num rows: 500 Data size: 93000 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 10 Limit Vectorization: className: VectorLimitOperator native: true Statistics: Num rows: 10 Data size: 1860 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: UDFToInteger(_col0) (type: int), _col1 (type: string), _col2 (type: bigint) outputColumnNames: _col0, _col1, _col2 Select Vectorization: className: VectorSelectOperator native: true projectedOutputColumnNums: [3, 1, 2] selectExpressions: CastStringToLong(col 0:string) -> 3:int Statistics: Num rows: 10 Data size: 1030 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col2 (type: bigint), _col0 (type: string) null sort order: az sort order: ++ Map-reduce partition columns: _col2 (type: bigint) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator keyColumns: 2:bigint, 3:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true partitionColumns: 2:bigint valueColumns: 3:int, 1:string Statistics: Num rows: 10 Data size: 1030 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: string) Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true reduceColumnNullOrder: az reduceColumnSortOrder: ++ allNative: false usesVectorUDFAdaptor: false vectorized: true rowBatchContext: dataColumnCount: 4 dataColumns: KEY._col2:bigint, KEY._col0:string, VALUE._col0:int, VALUE._col1:string partitionColumnCount: 0 scratchColumnTypeNames: [] Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: int), VALUE._col1 (type: string), KEY._col2 (type: bigint) outputColumnNames: _col0, _col1, _col2 Select Vectorization: className: VectorSelectOperator native: true projectedOutputColumnNums: [2, 3, 0] File Output Operator compressed: false Dp Sort State: PARTITION_SORTED File Sink Vectorization: className: VectorFileSinkOperator native: false Statistics: Num rows: 10 Data size: 1030 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.ctas_part Stage: Stage-2 Dependency Collection Stage: Stage-0 Move Operator tables: partition: modkey replace: true table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.ctas_part