diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties index ab3cd43158..c564e33f4b 100644 --- a/itests/src/test/resources/testconfiguration.properties +++ b/itests/src/test/resources/testconfiguration.properties @@ -421,6 +421,7 @@ minillaplocal.shared.query.files=alter_merge_2_orc.q,\ vectorized_context.q,\ vectorized_date_funcs.q,\ vectorized_dynamic_partition_pruning.q,\ + vectorized_insert_into_bucketed_table.q,\ vectorized_mapjoin.q,\ vectorized_math_funcs.q,\ vectorized_nested_mapjoin.q,\ @@ -707,6 +708,7 @@ minillaplocal.query.files=\ vector_windowing_windowspec.q,\ vector_windowing_windowspec4.q,\ vectorization_input_format_excludes.q,\ + vectorized_insert_into_bucketed_table.q,\ vectorization_short_regress.q,\ vectorized_dynamic_partition_pruning.q,\ vectorized_dynamic_semijoin_reduction.q,\ diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/reducesink/VectorReduceSinkObjectHashOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/reducesink/VectorReduceSinkObjectHashOperator.java index 072e09ef39..e99539246b 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/reducesink/VectorReduceSinkObjectHashOperator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/reducesink/VectorReduceSinkObjectHashOperator.java @@ -296,9 +296,10 @@ public void process(Object row, int tag) throws HiveException { keyWritable.get()[keyLength] = reduceTagByte; } keyWritable.setDistKeyLength(keyLength); - keyWritable.setHashCode(hashCode); } + keyWritable.setHashCode(hashCode); + if (!isEmptyValue) { valueLazyBinarySerializeWrite.reset(); valueVectorSerializeRow.serializeWrite(batch, batchIndex); diff --git a/ql/src/test/queries/clientpositive/vectorized_insert_into_bucketed_table.q b/ql/src/test/queries/clientpositive/vectorized_insert_into_bucketed_table.q new file mode 100644 index 0000000000..8a71a7074b --- /dev/null +++ b/ql/src/test/queries/clientpositive/vectorized_insert_into_bucketed_table.q @@ -0,0 +1,6 @@ +set hive.vectorized.execution.enabled=true; +create temporary table foo (x int) clustered by (x) into 4 buckets stored as orc; +explain vectorization detail insert overwrite table foo values(1),(2),(3),(4),(9); +insert overwrite table foo values(1),(2),(3),(4),(9); +select *, regexp_extract(INPUT__FILE__NAME, '.*/(.*)', 1) from foo; +drop table foo; \ No newline at end of file diff --git a/ql/src/test/results/clientpositive/llap/vectorized_insert_into_bucketed_table.q.out b/ql/src/test/results/clientpositive/llap/vectorized_insert_into_bucketed_table.q.out new file mode 100644 index 0000000000..28a96e08ce --- /dev/null +++ b/ql/src/test/results/clientpositive/llap/vectorized_insert_into_bucketed_table.q.out @@ -0,0 +1,158 @@ +PREHOOK: query: create temporary table foo (x int) clustered by (x) into 4 buckets stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@foo +POSTHOOK: query: create temporary table foo (x int) clustered by (x) into 4 buckets stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@foo +PREHOOK: query: explain vectorization detail insert overwrite table foo values(1),(2),(3),(4),(9) +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail insert overwrite table foo values(1),(2),(3),(4),(9) +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: values__tmp__table__1 + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:tmp_values_col1:string, 1:ROW__ID:struct] + Select Operator + expressions: tmp_values_col1 (type: string) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Map-reduce partition columns: UDFToInteger(_col0) (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumnNums: [] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + partitionColumnNums: [2] + valueColumnNums: [0] + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + includeColumns: [0] + dataColumns: tmp_values_col1:string + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint] + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: + reduceColumnSortOrder: + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: VALUE._col0:string + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint] + Reduce Operator Tree: + Select Operator + expressions: UDFToInteger(VALUE._col0) (type: int) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [1] + selectExpressions: CastStringToLong(col 0:string) -> 1:int + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.foo + + Stage: Stage-2 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.foo + + Stage: Stage-3 + Stats Work + Basic Stats Work: + +PREHOOK: query: insert overwrite table foo values(1),(2),(3),(4),(9) +PREHOOK: type: QUERY +PREHOOK: Output: default@foo +POSTHOOK: query: insert overwrite table foo values(1),(2),(3),(4),(9) +POSTHOOK: type: QUERY +POSTHOOK: Output: default@foo +POSTHOOK: Lineage: foo.x EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +PREHOOK: query: select *, regexp_extract(INPUT__FILE__NAME, '.*/(.*)', 1) from foo +PREHOOK: type: QUERY +PREHOOK: Input: default@foo +#### A masked pattern was here #### +POSTHOOK: query: select *, regexp_extract(INPUT__FILE__NAME, '.*/(.*)', 1) from foo +POSTHOOK: type: QUERY +POSTHOOK: Input: default@foo +#### A masked pattern was here #### +4 000000_0 +9 000001_0 +1 000001_0 +2 000002_0 +3 000003_0 +PREHOOK: query: drop table foo +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@foo +PREHOOK: Output: default@foo +POSTHOOK: query: drop table foo +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@foo +POSTHOOK: Output: default@foo