diff --git ql/src/test/queries/clientpositive/delete_orig_table.q ql/src/test/queries/clientpositive/delete_orig_table.q index c009b76..b196a74 100644 --- ql/src/test/queries/clientpositive/delete_orig_table.q +++ ql/src/test/queries/clientpositive/delete_orig_table.q @@ -1,7 +1,5 @@ --! qt:dataset:alltypesorc --- Suppress vectorization due to known bug. See HIVE-19109. set hive.vectorized.execution.enabled=false; -set hive.test.vectorized.execution.enabled.override=disable; set hive.support.concurrency=true; set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager; diff --git ql/src/test/queries/clientpositive/vector_delete_orig_table.q ql/src/test/queries/clientpositive/vector_delete_orig_table.q new file mode 100644 index 0000000..f914408 --- /dev/null +++ ql/src/test/queries/clientpositive/vector_delete_orig_table.q @@ -0,0 +1,36 @@ +--! qt:dataset:alltypesorc +set hive.vectorized.execution.enabled=true; +set hive.explain.user=false; +set hive.fetch.task.conversion=none; + +set hive.support.concurrency=true; +set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager; +set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat; + +dfs ${system:test.dfs.mkdir} ${system:test.tmp.dir}/delete_orig_table; +dfs -copyFromLocal ../../data/files/alltypesorc ${system:test.tmp.dir}/delete_orig_table/00000_0; + +create table acid_dot( + ctinyint TINYINT, + csmallint SMALLINT, + cint INT, + cbigint BIGINT, + cfloat FLOAT, + cdouble DOUBLE, + cstring1 STRING, + cstring2 STRING, + ctimestamp1 TIMESTAMP, + ctimestamp2 TIMESTAMP, + cboolean1 BOOLEAN, + cboolean2 BOOLEAN) clustered by (cint) into 1 buckets stored as orc location '${system:test.tmp.dir}/delete_orig_table' TBLPROPERTIES ('transactional'='true'); + +explain vectorization detail +select count(*) from acid_dot; + +select count(*) from acid_dot; + +delete from acid_dot where cint < -1070551679; + +select count(*) from acid_dot; + +dfs -rmr ${system:test.tmp.dir}/delete_orig_table; diff --git ql/src/test/results/clientpositive/tez/vector_delete_orig_table.q.out ql/src/test/results/clientpositive/tez/vector_delete_orig_table.q.out new file mode 100644 index 0000000..d1d8cc0 --- /dev/null +++ ql/src/test/results/clientpositive/tez/vector_delete_orig_table.q.out @@ -0,0 +1,180 @@ +PREHOOK: query: create table acid_dot( + ctinyint TINYINT, + csmallint SMALLINT, + cint INT, + cbigint BIGINT, + cfloat FLOAT, + cdouble DOUBLE, + cstring1 STRING, + cstring2 STRING, + ctimestamp1 TIMESTAMP, + ctimestamp2 TIMESTAMP, + cboolean1 BOOLEAN, +#### A masked pattern was here #### +PREHOOK: type: CREATETABLE +PREHOOK: Input: hdfs://### HDFS PATH ### +PREHOOK: Output: database:default +PREHOOK: Output: default@acid_dot +POSTHOOK: query: create table acid_dot( + ctinyint TINYINT, + csmallint SMALLINT, + cint INT, + cbigint BIGINT, + cfloat FLOAT, + cdouble DOUBLE, + cstring1 STRING, + cstring2 STRING, + ctimestamp1 TIMESTAMP, + ctimestamp2 TIMESTAMP, + cboolean1 BOOLEAN, +#### A masked pattern was here #### +POSTHOOK: type: CREATETABLE +POSTHOOK: Input: hdfs://### HDFS PATH ### +POSTHOOK: Output: database:default +POSTHOOK: Output: default@acid_dot +PREHOOK: query: explain vectorization detail +select count(*) from acid_dot +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select count(*) from acid_dot +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: acid_dot + Statistics: Num rows: 5865 Data size: 2956160 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct] + Select Operator + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [] + Statistics: Num rows: 5865 Data size: 2956160 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + Group By Vectorization: + aggregators: VectorUDAFCountStar(*) -> bigint + className: VectorGroupByOperator + groupByMode: HASH + native: false + vectorProcessingMode: HASH + projectedOutputColumnNums: [0] + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkEmptyKeyOperator + keyColumnNums: [] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [0] + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 12 + includeColumns: [] + dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 2 + Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: + reduceColumnSortOrder: + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: VALUE._col0:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(col 0:bigint) -> bigint + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: GLOBAL + projectedOutputColumnNums: [0] + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(*) from acid_dot +PREHOOK: type: QUERY +PREHOOK: Input: default@acid_dot +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: select count(*) from acid_dot +POSTHOOK: type: QUERY +POSTHOOK: Input: default@acid_dot +POSTHOOK: Output: hdfs://### HDFS PATH ### +12288 +PREHOOK: query: delete from acid_dot where cint < -1070551679 +PREHOOK: type: QUERY +PREHOOK: Input: default@acid_dot +PREHOOK: Output: default@acid_dot +POSTHOOK: query: delete from acid_dot where cint < -1070551679 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@acid_dot +POSTHOOK: Output: default@acid_dot +PREHOOK: query: select count(*) from acid_dot +PREHOOK: type: QUERY +PREHOOK: Input: default@acid_dot +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: select count(*) from acid_dot +POSTHOOK: type: QUERY +POSTHOOK: Input: default@acid_dot +POSTHOOK: Output: hdfs://### HDFS PATH ### +12192 +#### A masked pattern was here #### diff --git ql/src/test/results/clientpositive/vector_delete_orig_table.q.out ql/src/test/results/clientpositive/vector_delete_orig_table.q.out new file mode 100644 index 0000000..4ce897e --- /dev/null +++ ql/src/test/results/clientpositive/vector_delete_orig_table.q.out @@ -0,0 +1,152 @@ +PREHOOK: query: create table acid_dot( + ctinyint TINYINT, + csmallint SMALLINT, + cint INT, + cbigint BIGINT, + cfloat FLOAT, + cdouble DOUBLE, + cstring1 STRING, + cstring2 STRING, + ctimestamp1 TIMESTAMP, + ctimestamp2 TIMESTAMP, + cboolean1 BOOLEAN, +#### A masked pattern was here #### +PREHOOK: type: CREATETABLE +#### A masked pattern was here #### +PREHOOK: Output: database:default +PREHOOK: Output: default@acid_dot +POSTHOOK: query: create table acid_dot( + ctinyint TINYINT, + csmallint SMALLINT, + cint INT, + cbigint BIGINT, + cfloat FLOAT, + cdouble DOUBLE, + cstring1 STRING, + cstring2 STRING, + ctimestamp1 TIMESTAMP, + ctimestamp2 TIMESTAMP, + cboolean1 BOOLEAN, +#### A masked pattern was here #### +POSTHOOK: type: CREATETABLE +#### A masked pattern was here #### +POSTHOOK: Output: database:default +POSTHOOK: Output: default@acid_dot +PREHOOK: query: explain vectorization detail +select count(*) from acid_dot +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select count(*) from acid_dot +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: acid_dot + Statistics: Num rows: 5865 Data size: 2956160 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct] + Select Operator + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [] + Statistics: Num rows: 5865 Data size: 2956160 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + Group By Vectorization: + aggregators: VectorUDAFCountStar(*) -> bigint + className: VectorGroupByOperator + groupByMode: HASH + native: false + vectorProcessingMode: HASH + projectedOutputColumnNums: [0] + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 12 + includeColumns: [] + dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(*) from acid_dot +PREHOOK: type: QUERY +PREHOOK: Input: default@acid_dot +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from acid_dot +POSTHOOK: type: QUERY +POSTHOOK: Input: default@acid_dot +#### A masked pattern was here #### +12288 +PREHOOK: query: delete from acid_dot where cint < -1070551679 +PREHOOK: type: QUERY +PREHOOK: Input: default@acid_dot +PREHOOK: Output: default@acid_dot +POSTHOOK: query: delete from acid_dot where cint < -1070551679 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@acid_dot +POSTHOOK: Output: default@acid_dot +PREHOOK: query: select count(*) from acid_dot +PREHOOK: type: QUERY +PREHOOK: Input: default@acid_dot +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from acid_dot +POSTHOOK: type: QUERY +POSTHOOK: Input: default@acid_dot +#### A masked pattern was here #### +12192 +#### A masked pattern was here ####