diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MapJoinKey.java ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MapJoinKey.java index c44315f3d9..f44beed5fe 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MapJoinKey.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MapJoinKey.java @@ -70,7 +70,6 @@ public static MapJoinKey read(Output output, MapJoinObjectSerDeContext context, // All but decimal. SUPPORTED_PRIMITIVES.add(PrimitiveCategory.BOOLEAN); SUPPORTED_PRIMITIVES.add(PrimitiveCategory.VOID); - SUPPORTED_PRIMITIVES.add(PrimitiveCategory.BOOLEAN); SUPPORTED_PRIMITIVES.add(PrimitiveCategory.BYTE); SUPPORTED_PRIMITIVES.add(PrimitiveCategory.SHORT); SUPPORTED_PRIMITIVES.add(PrimitiveCategory.INT); @@ -85,6 +84,13 @@ public static MapJoinKey read(Output output, MapJoinObjectSerDeContext context, SUPPORTED_PRIMITIVES.add(PrimitiveCategory.BINARY); SUPPORTED_PRIMITIVES.add(PrimitiveCategory.VARCHAR); SUPPORTED_PRIMITIVES.add(PrimitiveCategory.CHAR); + /** + * No matter what scale/precision join keys are, they end up cast to common in query plan. + * We should be ok comparing them byte by byte. See + * {@link org.apache.hadoop.hive.ql.exec.FunctionRegistry#getCommonClassForComparison(TypeInfo, TypeInfo)} + * Q test: mapjoin_decimal_vectorized.q + */ + SUPPORTED_PRIMITIVES.add(PrimitiveCategory.DECIMAL); } public static boolean isSupportedField(ObjectInspector foi) { diff --git ql/src/test/queries/clientpositive/mapjoin_decimal_vectorized.q ql/src/test/queries/clientpositive/mapjoin_decimal_vectorized.q new file mode 100644 index 0000000000..6bab1059c7 --- /dev/null +++ ql/src/test/queries/clientpositive/mapjoin_decimal_vectorized.q @@ -0,0 +1,56 @@ +SET hive.vectorized.execution.enabled=true; +set hive.mapred.mode=nonstrict; +set hive.explain.user=false; +set hive.auto.convert.join=true; +set hive.auto.convert.join.noconditionaltask=true; +set hive.auto.convert.join.noconditionaltask.size=10000000; + +-- SORT_QUERY_RESULTS + +CREATE TABLE over1k_n5(t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + bo boolean, + s string, + ts timestamp, + `dec` decimal(4,2), + bin binary) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' +STORED AS TEXTFILE; + +LOAD DATA LOCAL INPATH '../../data/files/over1k' OVERWRITE INTO TABLE over1k_n5; + +CREATE TABLE t1_n95(`dec` decimal(4,2)) STORED AS ORC; +INSERT INTO TABLE t1_n95 select `dec` from over1k_n5; +CREATE TABLE t2_n59(`dec` decimal(4,0)) STORED AS ORC; +INSERT INTO TABLE t2_n59 select `dec` from over1k_n5; + + +set hive.mapjoin.optimized.hashtable=false; + +explain vectorization detail select t1_n95.`dec`, t2_n59.`dec` from t1_n95 join t2_n59 on (t1_n95.`dec`=t2_n59.`dec`) order by t1_n95.`dec`; + +select t1_n95.`dec`, t2_n59.`dec` from t1_n95 join t2_n59 on (t1_n95.`dec`=t2_n59.`dec`) order by t1_n95.`dec`; + +set hive.mapjoin.optimized.hashtable=true; + +select t1_n95.`dec`, t2_n59.`dec` from t1_n95 join t2_n59 on (t1_n95.`dec`=t2_n59.`dec`) order by t1_n95.`dec`; + +explain vectorization detail select t1_n95.`dec`, t2_n59.`dec` from t1_n95 join t2_n59 on (t1_n95.`dec`=t2_n59.`dec`) order by t1_n95.`dec`; + +set hive.cbo.enable=false; + +set hive.mapjoin.optimized.hashtable=false; + +explain vectorization detail select t1_n95.`dec`, t2_n59.`dec` from t1_n95 join t2_n59 on (t1_n95.`dec`=t2_n59.`dec`) order by t1_n95.`dec`; + +select t1_n95.`dec`, t2_n59.`dec` from t1_n95 join t2_n59 on (t1_n95.`dec`=t2_n59.`dec`) order by t1_n95.`dec`; + +set hive.mapjoin.optimized.hashtable=true; + +select t1_n95.`dec`, t2_n59.`dec` from t1_n95 join t2_n59 on (t1_n95.`dec`=t2_n59.`dec`) order by t1_n95.`dec`; + +explain vectorization detail select t1_n95.`dec`, t2_n59.`dec` from t1_n95 join t2_n59 on (t1_n95.`dec`=t2_n59.`dec`) order by t1_n95.`dec`; \ No newline at end of file diff --git ql/src/test/results/clientpositive/convert_decimal64_to_decimal.q.out ql/src/test/results/clientpositive/convert_decimal64_to_decimal.q.out index ad15414ac1..215f8a83d2 100644 --- ql/src/test/results/clientpositive/convert_decimal64_to_decimal.q.out +++ ql/src/test/results/clientpositive/convert_decimal64_to_decimal.q.out @@ -209,9 +209,8 @@ STAGE PLANS: bigTableValueExpressions: col 14:smallint className: VectorMapJoinOperator native: false - nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true - nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Optimized Table and Supports Key Types IS false - nativeNotSupportedKeyTypes: DECIMAL + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false outputColumnNames: _col1 Statistics: Num rows: 958 Data size: 3480 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator @@ -369,9 +368,8 @@ STAGE PLANS: bigTableValueExpressions: col 14:smallint className: VectorMapJoinOperator native: false - nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true - nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Optimized Table and Supports Key Types IS false - nativeNotSupportedKeyTypes: DECIMAL + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false outputColumnNames: _col1 Statistics: Num rows: 958 Data size: 3480 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator diff --git ql/src/test/results/clientpositive/llap/convert_decimal64_to_decimal.q.out ql/src/test/results/clientpositive/llap/convert_decimal64_to_decimal.q.out index 2570aed11b..b385554f13 100644 --- ql/src/test/results/clientpositive/llap/convert_decimal64_to_decimal.q.out +++ ql/src/test/results/clientpositive/llap/convert_decimal64_to_decimal.q.out @@ -187,12 +187,16 @@ STAGE PLANS: 0 _col0 (type: decimal(9,2)) 1 _col1 (type: decimal(9,2)) Map Join Vectorization: + bigTableKeyColumns: 20:decimal(9,2) bigTableKeyExpressions: ConvertDecimal64ToDecimal(col 3:decimal(9,2)/DECIMAL_64) -> 20:decimal(9,2) - className: VectorMapJoinOperator - native: false - nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true - nativeConditionsNotMet: Optimized Table and Supports Key Types IS false - nativeNotSupportedKeyTypes: DECIMAL + bigTableRetainColumnNums: [] + className: VectorMapJoinInnerMultiKeyOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nonOuterSmallTableKeyMapping: [] + projectedOutput: 21:smallint + smallTableValueMapping: 21:smallint + hashTableImplementationType: OPTIMIZED outputColumnNames: _col1 input vertices: 1 Map 3 @@ -200,7 +204,7 @@ STAGE PLANS: Group By Operator aggregations: count(_col1) Group By Vectorization: - aggregators: VectorUDAFCount(col 0:smallint) -> bigint + aggregators: VectorUDAFCount(col 21:smallint) -> bigint className: VectorGroupByOperator groupByMode: HASH native: false @@ -403,12 +407,15 @@ STAGE PLANS: 0 _col0 (type: decimal(9,2)) 1 _col1 (type: decimal(9,2)) Map Join Vectorization: - bigTableKeyExpressions: col 3:decimal(9,2) - className: VectorMapJoinOperator - native: false - nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true - nativeConditionsNotMet: Optimized Table and Supports Key Types IS false - nativeNotSupportedKeyTypes: DECIMAL + bigTableKeyColumns: 3:decimal(9,2) + bigTableRetainColumnNums: [] + className: VectorMapJoinInnerMultiKeyOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nonOuterSmallTableKeyMapping: [] + projectedOutput: 20:smallint + smallTableValueMapping: 20:smallint + hashTableImplementationType: OPTIMIZED outputColumnNames: _col1 input vertices: 1 Map 3 @@ -416,7 +423,7 @@ STAGE PLANS: Group By Operator aggregations: count(_col1) Group By Vectorization: - aggregators: VectorUDAFCount(col 0:smallint) -> bigint + aggregators: VectorUDAFCount(col 20:smallint) -> bigint className: VectorGroupByOperator groupByMode: HASH native: false diff --git ql/src/test/results/clientpositive/llap/mapjoin_decimal_vectorized.q.out ql/src/test/results/clientpositive/llap/mapjoin_decimal_vectorized.q.out new file mode 100644 index 0000000000..1762da4741 --- /dev/null +++ ql/src/test/results/clientpositive/llap/mapjoin_decimal_vectorized.q.out @@ -0,0 +1,686 @@ +PREHOOK: query: CREATE TABLE over1k_n5(t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + bo boolean, + s string, + ts timestamp, + `dec` decimal(4,2), + bin binary) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' +STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@over1k_n5 +POSTHOOK: query: CREATE TABLE over1k_n5(t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + bo boolean, + s string, + ts timestamp, + `dec` decimal(4,2), + bin binary) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' +STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@over1k_n5 +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/over1k' OVERWRITE INTO TABLE over1k_n5 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@over1k_n5 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/over1k' OVERWRITE INTO TABLE over1k_n5 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@over1k_n5 +PREHOOK: query: CREATE TABLE t1_n95(`dec` decimal(4,2)) STORED AS ORC +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@t1_n95 +POSTHOOK: query: CREATE TABLE t1_n95(`dec` decimal(4,2)) STORED AS ORC +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@t1_n95 +PREHOOK: query: INSERT INTO TABLE t1_n95 select `dec` from over1k_n5 +PREHOOK: type: QUERY +PREHOOK: Input: default@over1k_n5 +PREHOOK: Output: default@t1_n95 +POSTHOOK: query: INSERT INTO TABLE t1_n95 select `dec` from over1k_n5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over1k_n5 +POSTHOOK: Output: default@t1_n95 +POSTHOOK: Lineage: t1_n95.dec SIMPLE [(over1k_n5)over1k_n5.FieldSchema(name:dec, type:decimal(4,2), comment:null), ] +PREHOOK: query: CREATE TABLE t2_n59(`dec` decimal(4,0)) STORED AS ORC +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@t2_n59 +POSTHOOK: query: CREATE TABLE t2_n59(`dec` decimal(4,0)) STORED AS ORC +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@t2_n59 +PREHOOK: query: INSERT INTO TABLE t2_n59 select `dec` from over1k_n5 +PREHOOK: type: QUERY +PREHOOK: Input: default@over1k_n5 +PREHOOK: Output: default@t2_n59 +POSTHOOK: query: INSERT INTO TABLE t2_n59 select `dec` from over1k_n5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over1k_n5 +POSTHOOK: Output: default@t2_n59 +POSTHOOK: Lineage: t2_n59.dec EXPRESSION [(over1k_n5)over1k_n5.FieldSchema(name:dec, type:decimal(4,2), comment:null), ] +PREHOOK: query: explain vectorization detail select t1_n95.`dec`, t2_n59.`dec` from t1_n95 join t2_n59 on (t1_n95.`dec`=t2_n59.`dec`) order by t1_n95.`dec` +PREHOOK: type: QUERY +PREHOOK: Input: default@t1_n95 +PREHOOK: Input: default@t2_n59 +#### A masked pattern was here #### +POSTHOOK: query: explain vectorization detail select t1_n95.`dec`, t2_n59.`dec` from t1_n95 join t2_n59 on (t1_n95.`dec`=t2_n59.`dec`) order by t1_n95.`dec` +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1_n95 +POSTHOOK: Input: default@t2_n59 +#### A masked pattern was here #### +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Map 3 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1_n95 + filterExpr: dec is not null (type: boolean) + Statistics: Num rows: 1049 Data size: 117488 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:dec:decimal(4,2)/DECIMAL_64, 1:ROW__ID:struct] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0:decimal(4,2)/DECIMAL_64) + predicate: dec is not null (type: boolean) + Statistics: Num rows: 1049 Data size: 117488 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: dec (type: decimal(4,2)) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 1049 Data size: 117488 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: decimal(6,2)) + 1 _col0 (type: decimal(6,2)) + Map Join Vectorization: + bigTableKeyExpressions: ConvertDecimal64ToDecimal(col 0:decimal(6,2)/DECIMAL_64) -> 2:decimal(6,2) + bigTableValueExpressions: col 0:decimal(6,2)/DECIMAL_64 + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.mapjoin.optimized.hashtable IS false + outputColumnNames: _col0, _col1 + input vertices: + 1 Map 3 + Statistics: Num rows: 1112 Data size: 249088 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: decimal(4,2)) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:decimal(4,2) + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:decimal(4,0) + Statistics: Num rows: 1112 Data size: 249088 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: decimal(4,0)) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + includeColumns: [0] + dataColumns: dec:decimal(4,2)/DECIMAL_64 + partitionColumnCount: 0 + scratchColumnTypeNames: [decimal(6,2)] + Map 3 + Map Operator Tree: + TableScan + alias: t2_n59 + filterExpr: dec is not null (type: boolean) + Statistics: Num rows: 1049 Data size: 117488 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:dec:decimal(4,0)/DECIMAL_64, 1:ROW__ID:struct] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0:decimal(4,0)/DECIMAL_64) + predicate: dec is not null (type: boolean) + Statistics: Num rows: 1049 Data size: 117488 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: dec (type: decimal(4,0)) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 1049 Data size: 117488 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: decimal(6,2)) + sort order: + + Map-reduce partition columns: _col0 (type: decimal(6,2)) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + keyColumns: 0:decimal(6,2) + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1049 Data size: 117488 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + includeColumns: [0] + dataColumns: dec:decimal(4,0)/DECIMAL_64 + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: z + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: KEY.reducesinkkey0:decimal(4,2), VALUE._col0:decimal(4,0) + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: decimal(4,2)), VALUE._col0 (type: decimal(4,0)) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 1112 Data size: 249088 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1112 Data size: 249088 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select t1_n95.`dec`, t2_n59.`dec` from t1_n95 join t2_n59 on (t1_n95.`dec`=t2_n59.`dec`) order by t1_n95.`dec` +PREHOOK: type: QUERY +PREHOOK: Input: default@t1_n95 +PREHOOK: Input: default@t2_n59 +#### A masked pattern was here #### +POSTHOOK: query: select t1_n95.`dec`, t2_n59.`dec` from t1_n95 join t2_n59 on (t1_n95.`dec`=t2_n59.`dec`) order by t1_n95.`dec` +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1_n95 +POSTHOOK: Input: default@t2_n59 +#### A masked pattern was here #### +14.00 14 +14.00 14 +14.00 14 +14.00 14 +14.00 14 +14.00 14 +14.00 14 +14.00 14 +14.00 14 +17.00 17 +17.00 17 +17.00 17 +17.00 17 +17.00 17 +17.00 17 +17.00 17 +17.00 17 +17.00 17 +17.00 17 +45.00 45 +45.00 45 +45.00 45 +45.00 45 +45.00 45 +6.00 6 +6.00 6 +6.00 6 +6.00 6 +6.00 6 +6.00 6 +62.00 62 +62.00 62 +62.00 62 +62.00 62 +62.00 62 +62.00 62 +62.00 62 +62.00 62 +62.00 62 +62.00 62 +62.00 62 +62.00 62 +64.00 64 +64.00 64 +64.00 64 +64.00 64 +64.00 64 +64.00 64 +64.00 64 +64.00 64 +64.00 64 +64.00 64 +64.00 64 +64.00 64 +64.00 64 +64.00 64 +64.00 64 +64.00 64 +64.00 64 +64.00 64 +70.00 70 +70.00 70 +70.00 70 +70.00 70 +70.00 70 +70.00 70 +70.00 70 +79.00 79 +79.00 79 +79.00 79 +79.00 79 +79.00 79 +79.00 79 +89.00 89 +89.00 89 +89.00 89 +89.00 89 +89.00 89 +89.00 89 +89.00 89 +89.00 89 +89.00 89 +89.00 89 +89.00 89 +89.00 89 +9.00 9 +9.00 9 +9.00 9 +9.00 9 +9.00 9 +9.00 9 +9.00 9 +9.00 9 +9.00 9 +9.00 9 +9.00 9 +9.00 9 +9.00 9 +9.00 9 +9.00 9 +9.00 9 +9.00 9 +9.00 9 +9.00 9 +9.00 9 +9.00 9 +PREHOOK: query: select t1_n95.`dec`, t2_n59.`dec` from t1_n95 join t2_n59 on (t1_n95.`dec`=t2_n59.`dec`) order by t1_n95.`dec` +PREHOOK: type: QUERY +PREHOOK: Input: default@t1_n95 +PREHOOK: Input: default@t2_n59 +#### A masked pattern was here #### +POSTHOOK: query: select t1_n95.`dec`, t2_n59.`dec` from t1_n95 join t2_n59 on (t1_n95.`dec`=t2_n59.`dec`) order by t1_n95.`dec` +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1_n95 +POSTHOOK: Input: default@t2_n59 +#### A masked pattern was here #### +14.00 14 +14.00 14 +14.00 14 +14.00 14 +14.00 14 +14.00 14 +14.00 14 +14.00 14 +14.00 14 +17.00 17 +17.00 17 +17.00 17 +17.00 17 +17.00 17 +17.00 17 +17.00 17 +17.00 17 +17.00 17 +17.00 17 +45.00 45 +45.00 45 +45.00 45 +45.00 45 +45.00 45 +6.00 6 +6.00 6 +6.00 6 +6.00 6 +6.00 6 +6.00 6 +62.00 62 +62.00 62 +62.00 62 +62.00 62 +62.00 62 +62.00 62 +62.00 62 +62.00 62 +62.00 62 +62.00 62 +62.00 62 +62.00 62 +64.00 64 +64.00 64 +64.00 64 +64.00 64 +64.00 64 +64.00 64 +64.00 64 +64.00 64 +64.00 64 +64.00 64 +64.00 64 +64.00 64 +64.00 64 +64.00 64 +64.00 64 +64.00 64 +64.00 64 +64.00 64 +70.00 70 +70.00 70 +70.00 70 +70.00 70 +70.00 70 +70.00 70 +70.00 70 +79.00 79 +79.00 79 +79.00 79 +79.00 79 +79.00 79 +79.00 79 +89.00 89 +89.00 89 +89.00 89 +89.00 89 +89.00 89 +89.00 89 +89.00 89 +89.00 89 +89.00 89 +89.00 89 +89.00 89 +89.00 89 +9.00 9 +9.00 9 +9.00 9 +9.00 9 +9.00 9 +9.00 9 +9.00 9 +9.00 9 +9.00 9 +9.00 9 +9.00 9 +9.00 9 +9.00 9 +9.00 9 +9.00 9 +9.00 9 +9.00 9 +9.00 9 +9.00 9 +9.00 9 +9.00 9 +PREHOOK: query: explain vectorization detail select t1_n95.`dec`, t2_n59.`dec` from t1_n95 join t2_n59 on (t1_n95.`dec`=t2_n59.`dec`) order by t1_n95.`dec` +PREHOOK: type: QUERY +PREHOOK: Input: default@t1_n95 +PREHOOK: Input: default@t2_n59 +#### A masked pattern was here #### +POSTHOOK: query: explain vectorization detail select t1_n95.`dec`, t2_n59.`dec` from t1_n95 join t2_n59 on (t1_n95.`dec`=t2_n59.`dec`) order by t1_n95.`dec` +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1_n95 +POSTHOOK: Input: default@t2_n59 +#### A masked pattern was here #### +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Map 3 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1_n95 + filterExpr: dec is not null (type: boolean) + Statistics: Num rows: 1049 Data size: 117488 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:dec:decimal(4,2)/DECIMAL_64, 1:ROW__ID:struct] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0:decimal(4,2)/DECIMAL_64) + predicate: dec is not null (type: boolean) + Statistics: Num rows: 1049 Data size: 117488 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: dec (type: decimal(4,2)) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 1049 Data size: 117488 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: decimal(6,2)) + 1 _col0 (type: decimal(6,2)) + Map Join Vectorization: + bigTableKeyColumns: 2:decimal(6,2) + bigTableKeyExpressions: ConvertDecimal64ToDecimal(col 0:decimal(6,2)/DECIMAL_64) -> 2:decimal(6,2) + bigTableRetainColumnNums: [0] + bigTableValueColumns: 0:decimal(6,2) + className: VectorMapJoinInnerBigOnlyMultiKeyOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nonOuterSmallTableKeyMapping: [2] + projectedOutput: 0:decimal(6,2), 2:decimal(6,2) + hashTableImplementationType: OPTIMIZED + outputColumnNames: _col0, _col1 + input vertices: + 1 Map 3 + Statistics: Num rows: 1112 Data size: 249088 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: decimal(4,2)) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:decimal(4,2) + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 2:decimal(4,0) + Statistics: Num rows: 1112 Data size: 249088 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: decimal(4,0)) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + includeColumns: [0] + dataColumns: dec:decimal(4,2)/DECIMAL_64 + partitionColumnCount: 0 + scratchColumnTypeNames: [decimal(6,2)] + Map 3 + Map Operator Tree: + TableScan + alias: t2_n59 + filterExpr: dec is not null (type: boolean) + Statistics: Num rows: 1049 Data size: 117488 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:dec:decimal(4,0)/DECIMAL_64, 1:ROW__ID:struct] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0:decimal(4,0)/DECIMAL_64) + predicate: dec is not null (type: boolean) + Statistics: Num rows: 1049 Data size: 117488 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: dec (type: decimal(4,0)) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 1049 Data size: 117488 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: decimal(6,2)) + sort order: + + Map-reduce partition columns: _col0 (type: decimal(6,2)) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + keyColumns: 0:decimal(6,2) + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1049 Data size: 117488 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + includeColumns: [0] + dataColumns: dec:decimal(4,0)/DECIMAL_64 + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: z + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: KEY.reducesinkkey0:decimal(4,2), VALUE._col0:decimal(4,0) + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: decimal(4,2)), VALUE._col0 (type: decimal(4,0)) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 1112 Data size: 249088 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1112 Data size: 249088 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + diff --git ql/src/test/results/clientpositive/llap/vector_decimal_mapjoin.q.out ql/src/test/results/clientpositive/llap/vector_decimal_mapjoin.q.out index f07f2b08f4..0c21172022 100644 --- ql/src/test/results/clientpositive/llap/vector_decimal_mapjoin.q.out +++ ql/src/test/results/clientpositive/llap/vector_decimal_mapjoin.q.out @@ -133,13 +133,15 @@ STAGE PLANS: 0 _col0 (type: decimal(26,2)) 1 _col0 (type: decimal(26,2)) Map Join Vectorization: - bigTableKeyExpressions: col 0:decimal(26,2) - bigTableValueExpressions: col 0:decimal(26,2) - className: VectorMapJoinOperator - native: false - nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true - nativeConditionsNotMet: Optimized Table and Supports Key Types IS false - nativeNotSupportedKeyTypes: DECIMAL + bigTableKeyColumns: 0:decimal(26,2) + bigTableRetainColumnNums: [0] + bigTableValueColumns: 0:decimal(26,2) + className: VectorMapJoinInnerBigOnlyMultiKeyOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nonOuterSmallTableKeyMapping: [] + projectedOutput: 0:decimal(26,2), 0:decimal(26,2) + hashTableImplementationType: OPTIMIZED outputColumnNames: _col0, _col1 input vertices: 1 Map 2 @@ -415,13 +417,16 @@ STAGE PLANS: 0 _col0 (type: decimal(26,2)) 1 _col0 (type: decimal(26,2)) Map Join Vectorization: - bigTableKeyExpressions: col 0:decimal(26,2) - bigTableValueExpressions: col 0:decimal(26,2), col 1:decimal(22,2) - className: VectorMapJoinOperator - native: false - nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true - nativeConditionsNotMet: Optimized Table and Supports Key Types IS false - nativeNotSupportedKeyTypes: DECIMAL + bigTableKeyColumns: 0:decimal(26,2) + bigTableRetainColumnNums: [0, 1] + bigTableValueColumns: 0:decimal(26,2), 1:decimal(22,2) + className: VectorMapJoinInnerMultiKeyOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nonOuterSmallTableKeyMapping: [] + projectedOutput: 0:decimal(26,2), 1:decimal(22,2), 0:decimal(26,2), 3:decimal(24,0) + smallTableValueMapping: 3:decimal(24,0) + hashTableImplementationType: OPTIMIZED outputColumnNames: _col0, _col1, _col2, _col3 input vertices: 1 Map 2 @@ -775,13 +780,16 @@ STAGE PLANS: 0 _col0 (type: decimal(16,2)) 1 _col0 (type: decimal(16,2)) Map Join Vectorization: + bigTableKeyColumns: 3:decimal(16,2) bigTableKeyExpressions: ConvertDecimal64ToDecimal(col 0:decimal(16,2)/DECIMAL_64) -> 3:decimal(16,2) - bigTableValueExpressions: col 0:decimal(16,2)/DECIMAL_64 - className: VectorMapJoinOperator - native: false - nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true - nativeConditionsNotMet: Optimized Table and Supports Key Types IS false - nativeNotSupportedKeyTypes: DECIMAL + bigTableRetainColumnNums: [0] + bigTableValueColumns: 0:decimal(16,2) + className: VectorMapJoinInnerBigOnlyMultiKeyOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nonOuterSmallTableKeyMapping: [3] + projectedOutput: 0:decimal(16,2), 3:decimal(16,2) + hashTableImplementationType: OPTIMIZED outputColumnNames: _col0, _col1 input vertices: 1 Map 2 @@ -1057,13 +1065,17 @@ STAGE PLANS: 0 _col0 (type: decimal(16,2)) 1 _col0 (type: decimal(16,2)) Map Join Vectorization: + bigTableKeyColumns: 3:decimal(16,2) bigTableKeyExpressions: ConvertDecimal64ToDecimal(col 0:decimal(16,2)/DECIMAL_64) -> 3:decimal(16,2) - bigTableValueExpressions: col 0:decimal(16,2)/DECIMAL_64, col 1:decimal(14,2)/DECIMAL_64 - className: VectorMapJoinOperator - native: false - nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true - nativeConditionsNotMet: Optimized Table and Supports Key Types IS false - nativeNotSupportedKeyTypes: DECIMAL + bigTableRetainColumnNums: [0, 1] + bigTableValueColumns: 0:decimal(16,2), 1:decimal(14,2) + className: VectorMapJoinInnerMultiKeyOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nonOuterSmallTableKeyMapping: [3] + projectedOutput: 0:decimal(16,2), 1:decimal(14,2), 3:decimal(16,2), 4:decimal(14,0) + smallTableValueMapping: 4:decimal(14,0) + hashTableImplementationType: OPTIMIZED outputColumnNames: _col0, _col1, _col2, _col3 input vertices: 1 Map 2 @@ -1341,13 +1353,15 @@ STAGE PLANS: 0 _col0 (type: decimal(16,2)) 1 _col0 (type: decimal(16,2)) Map Join Vectorization: - bigTableKeyExpressions: col 0:decimal(16,2) - bigTableValueExpressions: col 0:decimal(16,2) - className: VectorMapJoinOperator - native: false - nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true - nativeConditionsNotMet: Optimized Table and Supports Key Types IS false - nativeNotSupportedKeyTypes: DECIMAL + bigTableKeyColumns: 0:decimal(16,2) + bigTableRetainColumnNums: [0] + bigTableValueColumns: 0:decimal(16,2) + className: VectorMapJoinInnerBigOnlyMultiKeyOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nonOuterSmallTableKeyMapping: [] + projectedOutput: 0:decimal(16,2), 0:decimal(16,2) + hashTableImplementationType: OPTIMIZED outputColumnNames: _col0, _col1 input vertices: 1 Map 2 @@ -1625,13 +1639,16 @@ STAGE PLANS: 0 _col0 (type: decimal(16,2)) 1 _col0 (type: decimal(16,2)) Map Join Vectorization: - bigTableKeyExpressions: col 0:decimal(16,2) - bigTableValueExpressions: col 0:decimal(16,2), col 1:decimal(14,2) - className: VectorMapJoinOperator - native: false - nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true - nativeConditionsNotMet: Optimized Table and Supports Key Types IS false - nativeNotSupportedKeyTypes: DECIMAL + bigTableKeyColumns: 0:decimal(16,2) + bigTableRetainColumnNums: [0, 1] + bigTableValueColumns: 0:decimal(16,2), 1:decimal(14,2) + className: VectorMapJoinInnerMultiKeyOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nonOuterSmallTableKeyMapping: [] + projectedOutput: 0:decimal(16,2), 1:decimal(14,2), 0:decimal(16,2), 3:decimal(14,0) + smallTableValueMapping: 3:decimal(14,0) + hashTableImplementationType: OPTIMIZED outputColumnNames: _col0, _col1, _col2, _col3 input vertices: 1 Map 2 diff --git ql/src/test/results/clientpositive/mapjoin_decimal_vectorized.q.out ql/src/test/results/clientpositive/mapjoin_decimal_vectorized.q.out new file mode 100644 index 0000000000..6d077c9535 --- /dev/null +++ ql/src/test/results/clientpositive/mapjoin_decimal_vectorized.q.out @@ -0,0 +1,1070 @@ +PREHOOK: query: CREATE TABLE over1k_n5(t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + bo boolean, + s string, + ts timestamp, + `dec` decimal(4,2), + bin binary) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' +STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@over1k_n5 +POSTHOOK: query: CREATE TABLE over1k_n5(t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + bo boolean, + s string, + ts timestamp, + `dec` decimal(4,2), + bin binary) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' +STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@over1k_n5 +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/over1k' OVERWRITE INTO TABLE over1k_n5 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@over1k_n5 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/over1k' OVERWRITE INTO TABLE over1k_n5 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@over1k_n5 +PREHOOK: query: CREATE TABLE t1_n95(`dec` decimal(4,2)) STORED AS ORC +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@t1_n95 +POSTHOOK: query: CREATE TABLE t1_n95(`dec` decimal(4,2)) STORED AS ORC +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@t1_n95 +PREHOOK: query: INSERT INTO TABLE t1_n95 select `dec` from over1k_n5 +PREHOOK: type: QUERY +PREHOOK: Input: default@over1k_n5 +PREHOOK: Output: default@t1_n95 +POSTHOOK: query: INSERT INTO TABLE t1_n95 select `dec` from over1k_n5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over1k_n5 +POSTHOOK: Output: default@t1_n95 +POSTHOOK: Lineage: t1_n95.dec SIMPLE [(over1k_n5)over1k_n5.FieldSchema(name:dec, type:decimal(4,2), comment:null), ] +PREHOOK: query: CREATE TABLE t2_n59(`dec` decimal(4,0)) STORED AS ORC +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@t2_n59 +POSTHOOK: query: CREATE TABLE t2_n59(`dec` decimal(4,0)) STORED AS ORC +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@t2_n59 +PREHOOK: query: INSERT INTO TABLE t2_n59 select `dec` from over1k_n5 +PREHOOK: type: QUERY +PREHOOK: Input: default@over1k_n5 +PREHOOK: Output: default@t2_n59 +POSTHOOK: query: INSERT INTO TABLE t2_n59 select `dec` from over1k_n5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over1k_n5 +POSTHOOK: Output: default@t2_n59 +POSTHOOK: Lineage: t2_n59.dec EXPRESSION [(over1k_n5)over1k_n5.FieldSchema(name:dec, type:decimal(4,2), comment:null), ] +PREHOOK: query: explain vectorization detail select t1_n95.`dec`, t2_n59.`dec` from t1_n95 join t2_n59 on (t1_n95.`dec`=t2_n59.`dec`) order by t1_n95.`dec` +PREHOOK: type: QUERY +PREHOOK: Input: default@t1_n95 +PREHOOK: Input: default@t2_n59 +#### A masked pattern was here #### +POSTHOOK: query: explain vectorization detail select t1_n95.`dec`, t2_n59.`dec` from t1_n95 join t2_n59 on (t1_n95.`dec`=t2_n59.`dec`) order by t1_n95.`dec` +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1_n95 +POSTHOOK: Input: default@t2_n59 +#### A masked pattern was here #### +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-5 is a root stage + Stage-2 depends on stages: Stage-5 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-5 + Map Reduce Local Work + Alias -> Map Local Tables: + $hdt$_1:t2_n59 + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + $hdt$_1:t2_n59 + TableScan + alias: t2_n59 + filterExpr: dec is not null (type: boolean) + Statistics: Num rows: 1049 Data size: 117488 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: dec is not null (type: boolean) + Statistics: Num rows: 1049 Data size: 117488 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: dec (type: decimal(4,0)) + outputColumnNames: _col0 + Statistics: Num rows: 1049 Data size: 117488 Basic stats: COMPLETE Column stats: COMPLETE + HashTable Sink Operator + keys: + 0 _col0 (type: decimal(6,2)) + 1 _col0 (type: decimal(6,2)) + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + alias: t1_n95 + filterExpr: dec is not null (type: boolean) + Statistics: Num rows: 1049 Data size: 117488 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:dec:decimal(4,2)/DECIMAL_64, 1:ROW__ID:struct] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0:decimal(4,2)/DECIMAL_64) + predicate: dec is not null (type: boolean) + Statistics: Num rows: 1049 Data size: 117488 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: dec (type: decimal(4,2)) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 1049 Data size: 117488 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: decimal(6,2)) + 1 _col0 (type: decimal(6,2)) + Map Join Vectorization: + bigTableKeyExpressions: ConvertDecimal64ToDecimal(col 0:decimal(6,2)/DECIMAL_64) -> 2:decimal(6,2) + bigTableValueExpressions: col 0:decimal(6,2)/DECIMAL_64 + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.vectorized.execution.mapjoin.native.enabled IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.mapjoin.optimized.hashtable IS false, hive.execution.engine mr IN [tez, spark] IS false + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1112 Data size: 249088 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: decimal(4,2)) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 1112 Data size: 249088 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: decimal(4,0)) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + includeColumns: [0] + dataColumns: dec:decimal(4,2)/DECIMAL_64 + partitionColumnCount: 0 + scratchColumnTypeNames: [decimal(6,2)] + Local Work: + Map Reduce Local Work + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: decimal(4,2)), VALUE._col0 (type: decimal(4,0)) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1112 Data size: 249088 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1112 Data size: 249088 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select t1_n95.`dec`, t2_n59.`dec` from t1_n95 join t2_n59 on (t1_n95.`dec`=t2_n59.`dec`) order by t1_n95.`dec` +PREHOOK: type: QUERY +PREHOOK: Input: default@t1_n95 +PREHOOK: Input: default@t2_n59 +#### A masked pattern was here #### +POSTHOOK: query: select t1_n95.`dec`, t2_n59.`dec` from t1_n95 join t2_n59 on (t1_n95.`dec`=t2_n59.`dec`) order by t1_n95.`dec` +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1_n95 +POSTHOOK: Input: default@t2_n59 +#### A masked pattern was here #### +14.00 14 +14.00 14 +14.00 14 +14.00 14 +14.00 14 +14.00 14 +14.00 14 +14.00 14 +14.00 14 +17.00 17 +17.00 17 +17.00 17 +17.00 17 +17.00 17 +17.00 17 +17.00 17 +17.00 17 +17.00 17 +17.00 17 +45.00 45 +45.00 45 +45.00 45 +45.00 45 +45.00 45 +6.00 6 +6.00 6 +6.00 6 +6.00 6 +6.00 6 +6.00 6 +62.00 62 +62.00 62 +62.00 62 +62.00 62 +62.00 62 +62.00 62 +62.00 62 +62.00 62 +62.00 62 +62.00 62 +62.00 62 +62.00 62 +64.00 64 +64.00 64 +64.00 64 +64.00 64 +64.00 64 +64.00 64 +64.00 64 +64.00 64 +64.00 64 +64.00 64 +64.00 64 +64.00 64 +64.00 64 +64.00 64 +64.00 64 +64.00 64 +64.00 64 +64.00 64 +70.00 70 +70.00 70 +70.00 70 +70.00 70 +70.00 70 +70.00 70 +70.00 70 +79.00 79 +79.00 79 +79.00 79 +79.00 79 +79.00 79 +79.00 79 +89.00 89 +89.00 89 +89.00 89 +89.00 89 +89.00 89 +89.00 89 +89.00 89 +89.00 89 +89.00 89 +89.00 89 +89.00 89 +89.00 89 +9.00 9 +9.00 9 +9.00 9 +9.00 9 +9.00 9 +9.00 9 +9.00 9 +9.00 9 +9.00 9 +9.00 9 +9.00 9 +9.00 9 +9.00 9 +9.00 9 +9.00 9 +9.00 9 +9.00 9 +9.00 9 +9.00 9 +9.00 9 +9.00 9 +PREHOOK: query: select t1_n95.`dec`, t2_n59.`dec` from t1_n95 join t2_n59 on (t1_n95.`dec`=t2_n59.`dec`) order by t1_n95.`dec` +PREHOOK: type: QUERY +PREHOOK: Input: default@t1_n95 +PREHOOK: Input: default@t2_n59 +#### A masked pattern was here #### +POSTHOOK: query: select t1_n95.`dec`, t2_n59.`dec` from t1_n95 join t2_n59 on (t1_n95.`dec`=t2_n59.`dec`) order by t1_n95.`dec` +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1_n95 +POSTHOOK: Input: default@t2_n59 +#### A masked pattern was here #### +14.00 14 +14.00 14 +14.00 14 +14.00 14 +14.00 14 +14.00 14 +14.00 14 +14.00 14 +14.00 14 +17.00 17 +17.00 17 +17.00 17 +17.00 17 +17.00 17 +17.00 17 +17.00 17 +17.00 17 +17.00 17 +17.00 17 +45.00 45 +45.00 45 +45.00 45 +45.00 45 +45.00 45 +6.00 6 +6.00 6 +6.00 6 +6.00 6 +6.00 6 +6.00 6 +62.00 62 +62.00 62 +62.00 62 +62.00 62 +62.00 62 +62.00 62 +62.00 62 +62.00 62 +62.00 62 +62.00 62 +62.00 62 +62.00 62 +64.00 64 +64.00 64 +64.00 64 +64.00 64 +64.00 64 +64.00 64 +64.00 64 +64.00 64 +64.00 64 +64.00 64 +64.00 64 +64.00 64 +64.00 64 +64.00 64 +64.00 64 +64.00 64 +64.00 64 +64.00 64 +70.00 70 +70.00 70 +70.00 70 +70.00 70 +70.00 70 +70.00 70 +70.00 70 +79.00 79 +79.00 79 +79.00 79 +79.00 79 +79.00 79 +79.00 79 +89.00 89 +89.00 89 +89.00 89 +89.00 89 +89.00 89 +89.00 89 +89.00 89 +89.00 89 +89.00 89 +89.00 89 +89.00 89 +89.00 89 +9.00 9 +9.00 9 +9.00 9 +9.00 9 +9.00 9 +9.00 9 +9.00 9 +9.00 9 +9.00 9 +9.00 9 +9.00 9 +9.00 9 +9.00 9 +9.00 9 +9.00 9 +9.00 9 +9.00 9 +9.00 9 +9.00 9 +9.00 9 +9.00 9 +PREHOOK: query: explain vectorization detail select t1_n95.`dec`, t2_n59.`dec` from t1_n95 join t2_n59 on (t1_n95.`dec`=t2_n59.`dec`) order by t1_n95.`dec` +PREHOOK: type: QUERY +PREHOOK: Input: default@t1_n95 +PREHOOK: Input: default@t2_n59 +#### A masked pattern was here #### +POSTHOOK: query: explain vectorization detail select t1_n95.`dec`, t2_n59.`dec` from t1_n95 join t2_n59 on (t1_n95.`dec`=t2_n59.`dec`) order by t1_n95.`dec` +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1_n95 +POSTHOOK: Input: default@t2_n59 +#### A masked pattern was here #### +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-5 is a root stage + Stage-2 depends on stages: Stage-5 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-5 + Map Reduce Local Work + Alias -> Map Local Tables: + $hdt$_1:t2_n59 + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + $hdt$_1:t2_n59 + TableScan + alias: t2_n59 + filterExpr: dec is not null (type: boolean) + Statistics: Num rows: 1049 Data size: 117488 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: dec is not null (type: boolean) + Statistics: Num rows: 1049 Data size: 117488 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: dec (type: decimal(4,0)) + outputColumnNames: _col0 + Statistics: Num rows: 1049 Data size: 117488 Basic stats: COMPLETE Column stats: COMPLETE + HashTable Sink Operator + keys: + 0 _col0 (type: decimal(6,2)) + 1 _col0 (type: decimal(6,2)) + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + alias: t1_n95 + filterExpr: dec is not null (type: boolean) + Statistics: Num rows: 1049 Data size: 117488 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:dec:decimal(4,2)/DECIMAL_64, 1:ROW__ID:struct] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0:decimal(4,2)/DECIMAL_64) + predicate: dec is not null (type: boolean) + Statistics: Num rows: 1049 Data size: 117488 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: dec (type: decimal(4,2)) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 1049 Data size: 117488 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: decimal(6,2)) + 1 _col0 (type: decimal(6,2)) + Map Join Vectorization: + bigTableKeyExpressions: ConvertDecimal64ToDecimal(col 0:decimal(6,2)/DECIMAL_64) -> 2:decimal(6,2) + bigTableValueExpressions: col 0:decimal(6,2)/DECIMAL_64 + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1112 Data size: 249088 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: decimal(4,2)) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 1112 Data size: 249088 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: decimal(4,0)) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + includeColumns: [0] + dataColumns: dec:decimal(4,2)/DECIMAL_64 + partitionColumnCount: 0 + scratchColumnTypeNames: [decimal(6,2)] + Local Work: + Map Reduce Local Work + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: decimal(4,2)), VALUE._col0 (type: decimal(4,0)) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1112 Data size: 249088 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1112 Data size: 249088 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain vectorization detail select t1_n95.`dec`, t2_n59.`dec` from t1_n95 join t2_n59 on (t1_n95.`dec`=t2_n59.`dec`) order by t1_n95.`dec` +PREHOOK: type: QUERY +PREHOOK: Input: default@t1_n95 +PREHOOK: Input: default@t2_n59 +#### A masked pattern was here #### +POSTHOOK: query: explain vectorization detail select t1_n95.`dec`, t2_n59.`dec` from t1_n95 join t2_n59 on (t1_n95.`dec`=t2_n59.`dec`) order by t1_n95.`dec` +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1_n95 +POSTHOOK: Input: default@t2_n59 +#### A masked pattern was here #### +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-5 is a root stage + Stage-2 depends on stages: Stage-5 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-5 + Map Reduce Local Work + Alias -> Map Local Tables: + t2_n59 + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + t2_n59 + TableScan + alias: t2_n59 + filterExpr: dec is not null (type: boolean) + Statistics: Num rows: 1049 Data size: 117488 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: dec is not null (type: boolean) + Statistics: Num rows: 1049 Data size: 117488 Basic stats: COMPLETE Column stats: COMPLETE + HashTable Sink Operator + keys: + 0 dec (type: decimal(6,2)) + 1 dec (type: decimal(6,2)) + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + alias: t1_n95 + filterExpr: dec is not null (type: boolean) + Statistics: Num rows: 1049 Data size: 117488 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:dec:decimal(4,2)/DECIMAL_64, 1:ROW__ID:struct] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0:decimal(6,2)/DECIMAL_64) + predicate: dec is not null (type: boolean) + Statistics: Num rows: 1049 Data size: 117488 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 dec (type: decimal(6,2)) + 1 dec (type: decimal(6,2)) + Map Join Vectorization: + bigTableKeyExpressions: ConvertDecimal64ToDecimal(col 0:decimal(6,2)/DECIMAL_64) -> 2:decimal(6,2) + bigTableValueExpressions: col 0:decimal(4,2)/DECIMAL_64 + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.vectorized.execution.mapjoin.native.enabled IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.mapjoin.optimized.hashtable IS false, hive.execution.engine mr IN [tez, spark] IS false + outputColumnNames: _col0, _col4 + Statistics: Num rows: 1112 Data size: 249088 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: decimal(4,2)), _col4 (type: decimal(4,0)) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 1112 Data size: 249088 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: decimal(4,2)) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 1112 Data size: 249088 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: decimal(4,0)) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + includeColumns: [0] + dataColumns: dec:decimal(4,2)/DECIMAL_64 + partitionColumnCount: 0 + scratchColumnTypeNames: [decimal(6,2), decimal(4,0)] + Local Work: + Map Reduce Local Work + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: decimal(4,2)), VALUE._col0 (type: decimal(4,0)) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1112 Data size: 249088 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1112 Data size: 249088 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select t1_n95.`dec`, t2_n59.`dec` from t1_n95 join t2_n59 on (t1_n95.`dec`=t2_n59.`dec`) order by t1_n95.`dec` +PREHOOK: type: QUERY +PREHOOK: Input: default@t1_n95 +PREHOOK: Input: default@t2_n59 +#### A masked pattern was here #### +POSTHOOK: query: select t1_n95.`dec`, t2_n59.`dec` from t1_n95 join t2_n59 on (t1_n95.`dec`=t2_n59.`dec`) order by t1_n95.`dec` +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1_n95 +POSTHOOK: Input: default@t2_n59 +#### A masked pattern was here #### +14.00 14 +14.00 14 +14.00 14 +14.00 14 +14.00 14 +14.00 14 +14.00 14 +14.00 14 +14.00 14 +17.00 17 +17.00 17 +17.00 17 +17.00 17 +17.00 17 +17.00 17 +17.00 17 +17.00 17 +17.00 17 +17.00 17 +45.00 45 +45.00 45 +45.00 45 +45.00 45 +45.00 45 +6.00 6 +6.00 6 +6.00 6 +6.00 6 +6.00 6 +6.00 6 +62.00 62 +62.00 62 +62.00 62 +62.00 62 +62.00 62 +62.00 62 +62.00 62 +62.00 62 +62.00 62 +62.00 62 +62.00 62 +62.00 62 +64.00 64 +64.00 64 +64.00 64 +64.00 64 +64.00 64 +64.00 64 +64.00 64 +64.00 64 +64.00 64 +64.00 64 +64.00 64 +64.00 64 +64.00 64 +64.00 64 +64.00 64 +64.00 64 +64.00 64 +64.00 64 +70.00 70 +70.00 70 +70.00 70 +70.00 70 +70.00 70 +70.00 70 +70.00 70 +79.00 79 +79.00 79 +79.00 79 +79.00 79 +79.00 79 +79.00 79 +89.00 89 +89.00 89 +89.00 89 +89.00 89 +89.00 89 +89.00 89 +89.00 89 +89.00 89 +89.00 89 +89.00 89 +89.00 89 +89.00 89 +9.00 9 +9.00 9 +9.00 9 +9.00 9 +9.00 9 +9.00 9 +9.00 9 +9.00 9 +9.00 9 +9.00 9 +9.00 9 +9.00 9 +9.00 9 +9.00 9 +9.00 9 +9.00 9 +9.00 9 +9.00 9 +9.00 9 +9.00 9 +9.00 9 +PREHOOK: query: select t1_n95.`dec`, t2_n59.`dec` from t1_n95 join t2_n59 on (t1_n95.`dec`=t2_n59.`dec`) order by t1_n95.`dec` +PREHOOK: type: QUERY +PREHOOK: Input: default@t1_n95 +PREHOOK: Input: default@t2_n59 +#### A masked pattern was here #### +POSTHOOK: query: select t1_n95.`dec`, t2_n59.`dec` from t1_n95 join t2_n59 on (t1_n95.`dec`=t2_n59.`dec`) order by t1_n95.`dec` +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1_n95 +POSTHOOK: Input: default@t2_n59 +#### A masked pattern was here #### +14.00 14 +14.00 14 +14.00 14 +14.00 14 +14.00 14 +14.00 14 +14.00 14 +14.00 14 +14.00 14 +17.00 17 +17.00 17 +17.00 17 +17.00 17 +17.00 17 +17.00 17 +17.00 17 +17.00 17 +17.00 17 +17.00 17 +45.00 45 +45.00 45 +45.00 45 +45.00 45 +45.00 45 +6.00 6 +6.00 6 +6.00 6 +6.00 6 +6.00 6 +6.00 6 +62.00 62 +62.00 62 +62.00 62 +62.00 62 +62.00 62 +62.00 62 +62.00 62 +62.00 62 +62.00 62 +62.00 62 +62.00 62 +62.00 62 +64.00 64 +64.00 64 +64.00 64 +64.00 64 +64.00 64 +64.00 64 +64.00 64 +64.00 64 +64.00 64 +64.00 64 +64.00 64 +64.00 64 +64.00 64 +64.00 64 +64.00 64 +64.00 64 +64.00 64 +64.00 64 +70.00 70 +70.00 70 +70.00 70 +70.00 70 +70.00 70 +70.00 70 +70.00 70 +79.00 79 +79.00 79 +79.00 79 +79.00 79 +79.00 79 +79.00 79 +89.00 89 +89.00 89 +89.00 89 +89.00 89 +89.00 89 +89.00 89 +89.00 89 +89.00 89 +89.00 89 +89.00 89 +89.00 89 +89.00 89 +9.00 9 +9.00 9 +9.00 9 +9.00 9 +9.00 9 +9.00 9 +9.00 9 +9.00 9 +9.00 9 +9.00 9 +9.00 9 +9.00 9 +9.00 9 +9.00 9 +9.00 9 +9.00 9 +9.00 9 +9.00 9 +9.00 9 +9.00 9 +9.00 9 +PREHOOK: query: explain vectorization detail select t1_n95.`dec`, t2_n59.`dec` from t1_n95 join t2_n59 on (t1_n95.`dec`=t2_n59.`dec`) order by t1_n95.`dec` +PREHOOK: type: QUERY +PREHOOK: Input: default@t1_n95 +PREHOOK: Input: default@t2_n59 +#### A masked pattern was here #### +POSTHOOK: query: explain vectorization detail select t1_n95.`dec`, t2_n59.`dec` from t1_n95 join t2_n59 on (t1_n95.`dec`=t2_n59.`dec`) order by t1_n95.`dec` +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1_n95 +POSTHOOK: Input: default@t2_n59 +#### A masked pattern was here #### +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-5 is a root stage + Stage-2 depends on stages: Stage-5 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-5 + Map Reduce Local Work + Alias -> Map Local Tables: + t2_n59 + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + t2_n59 + TableScan + alias: t2_n59 + filterExpr: dec is not null (type: boolean) + Statistics: Num rows: 1049 Data size: 117488 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: dec is not null (type: boolean) + Statistics: Num rows: 1049 Data size: 117488 Basic stats: COMPLETE Column stats: COMPLETE + HashTable Sink Operator + keys: + 0 dec (type: decimal(6,2)) + 1 dec (type: decimal(6,2)) + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + alias: t1_n95 + filterExpr: dec is not null (type: boolean) + Statistics: Num rows: 1049 Data size: 117488 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:dec:decimal(4,2)/DECIMAL_64, 1:ROW__ID:struct] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0:decimal(6,2)/DECIMAL_64) + predicate: dec is not null (type: boolean) + Statistics: Num rows: 1049 Data size: 117488 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 dec (type: decimal(6,2)) + 1 dec (type: decimal(6,2)) + Map Join Vectorization: + bigTableKeyExpressions: ConvertDecimal64ToDecimal(col 0:decimal(6,2)/DECIMAL_64) -> 2:decimal(6,2) + bigTableValueExpressions: col 0:decimal(4,2)/DECIMAL_64 + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + outputColumnNames: _col0, _col4 + Statistics: Num rows: 1112 Data size: 249088 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: decimal(4,2)), _col4 (type: decimal(4,0)) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 1112 Data size: 249088 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: decimal(4,2)) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 1112 Data size: 249088 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: decimal(4,0)) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + includeColumns: [0] + dataColumns: dec:decimal(4,2)/DECIMAL_64 + partitionColumnCount: 0 + scratchColumnTypeNames: [decimal(6,2), decimal(4,0)] + Local Work: + Map Reduce Local Work + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: decimal(4,2)), VALUE._col0 (type: decimal(4,0)) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1112 Data size: 249088 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1112 Data size: 249088 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + diff --git ql/src/test/results/clientpositive/vector_decimal_mapjoin.q.out ql/src/test/results/clientpositive/vector_decimal_mapjoin.q.out index 1ced256d38..d26ca931ec 100644 --- ql/src/test/results/clientpositive/vector_decimal_mapjoin.q.out +++ ql/src/test/results/clientpositive/vector_decimal_mapjoin.q.out @@ -156,9 +156,8 @@ STAGE PLANS: bigTableValueExpressions: col 0:decimal(26,2) className: VectorMapJoinOperator native: false - nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true - nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Optimized Table and Supports Key Types IS false - nativeNotSupportedKeyTypes: DECIMAL + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false outputColumnNames: _col0, _col1 Statistics: Num rows: 1096 Data size: 116858 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -405,9 +404,8 @@ STAGE PLANS: bigTableValueExpressions: col 0:decimal(26,2), col 1:decimal(22,2) className: VectorMapJoinOperator native: false - nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true - nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Optimized Table and Supports Key Types IS false - nativeNotSupportedKeyTypes: DECIMAL + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 1096 Data size: 233717 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -730,9 +728,8 @@ STAGE PLANS: bigTableValueExpressions: col 0:decimal(16,2)/DECIMAL_64 className: VectorMapJoinOperator native: false - nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true - nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Optimized Table and Supports Key Types IS false - nativeNotSupportedKeyTypes: DECIMAL + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false outputColumnNames: _col0, _col1 Statistics: Num rows: 1096 Data size: 116858 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -979,9 +976,8 @@ STAGE PLANS: bigTableValueExpressions: col 0:decimal(16,2)/DECIMAL_64, col 1:decimal(14,2)/DECIMAL_64 className: VectorMapJoinOperator native: false - nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true - nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Optimized Table and Supports Key Types IS false - nativeNotSupportedKeyTypes: DECIMAL + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 1096 Data size: 233717 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -1228,9 +1224,8 @@ STAGE PLANS: bigTableValueExpressions: col 0:decimal(16,2) className: VectorMapJoinOperator native: false - nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true - nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Optimized Table and Supports Key Types IS false - nativeNotSupportedKeyTypes: DECIMAL + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false outputColumnNames: _col0, _col1 Statistics: Num rows: 1096 Data size: 116858 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -1478,9 +1473,8 @@ STAGE PLANS: bigTableValueExpressions: col 0:decimal(16,2), col 1:decimal(14,2) className: VectorMapJoinOperator native: false - nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true - nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Optimized Table and Supports Key Types IS false - nativeNotSupportedKeyTypes: DECIMAL + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 1096 Data size: 233717 Basic stats: COMPLETE Column stats: NONE File Output Operator