From 4f7eea123ca217c1d13a861ce76cddb36f8118bd Mon Sep 17 00:00:00 2001 From: Ashutosh Chauhan Date: Thu, 8 Sep 2016 16:16:37 -0700 Subject: [PATCH] HIVE-14726 : delete statement fails when spdo is on --- .../ql/optimizer/SortedDynPartitionOptimizer.java | 7 +++ ql/src/test/queries/clientpositive/spdo_delete.q | 10 ++++ .../test/results/clientpositive/spdo_delete.q.out | 66 ++++++++++++++++++++++ 3 files changed, 83 insertions(+) create mode 100644 ql/src/test/queries/clientpositive/spdo_delete.q create mode 100644 ql/src/test/results/clientpositive/spdo_delete.q.out diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SortedDynPartitionOptimizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SortedDynPartitionOptimizer.java index febd446..c0e2888 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SortedDynPartitionOptimizer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SortedDynPartitionOptimizer.java @@ -143,6 +143,13 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, if (destTable == null) { LOG.debug("Bailing out of sort dynamic partition optimization as destination table is null"); return null; + } else { + if (fsOp.getConf().getWriteType() == AcidUtils.Operation.UPDATE || + fsOp.getConf().getWriteType() == AcidUtils.Operation.DELETE) { + // No advantage of running this optimization for update/delete + LOG.debug("Bailing out of sort dynamic partition optimization since update or delete is detected."); + return null; + } } // unlink connection between FS and its parent diff --git a/ql/src/test/queries/clientpositive/spdo_delete.q b/ql/src/test/queries/clientpositive/spdo_delete.q new file mode 100644 index 0000000..8b3f9c7 --- /dev/null +++ b/ql/src/test/queries/clientpositive/spdo_delete.q @@ -0,0 +1,10 @@ +set hive.optimize.sort.dynamic.partition=true; +set hive.exec.dynamic.partition.mode=nonstrict; +set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager; + +CREATE TABLE tbl_q4jukw9ucw (ca_street_number string, ca_city string, ca_state string) PARTITIONED BY (ca_location_type string) CLUSTERED BY (ca_state) INTO 50 BUCKETS STORED AS ORC TBLPROPERTIES('transactional'='true'); +explain DELETE FROM tbl_q4jukw9ucw WHERE ca_city = 'San Jose'; +reset -d hive.optimize.sort.dynamic.partition; +reset -d hive.exec.dynamic.partition.mode; +reset -d hive.txn.manager; + diff --git a/ql/src/test/results/clientpositive/spdo_delete.q.out b/ql/src/test/results/clientpositive/spdo_delete.q.out new file mode 100644 index 0000000..053d74d --- /dev/null +++ b/ql/src/test/results/clientpositive/spdo_delete.q.out @@ -0,0 +1,66 @@ +PREHOOK: query: CREATE TABLE tbl_q4jukw9ucw (ca_street_number string, ca_city string, ca_state string) PARTITIONED BY (ca_location_type string) CLUSTERED BY (ca_state) INTO 50 BUCKETS STORED AS ORC TBLPROPERTIES('transactional'='true') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@tbl_q4jukw9ucw +POSTHOOK: query: CREATE TABLE tbl_q4jukw9ucw (ca_street_number string, ca_city string, ca_state string) PARTITIONED BY (ca_location_type string) CLUSTERED BY (ca_state) INTO 50 BUCKETS STORED AS ORC TBLPROPERTIES('transactional'='true') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@tbl_q4jukw9ucw +PREHOOK: query: explain DELETE FROM tbl_q4jukw9ucw WHERE ca_city = 'San Jose' +PREHOOK: type: QUERY +POSTHOOK: query: explain DELETE FROM tbl_q4jukw9ucw WHERE ca_city = 'San Jose' +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: tbl_q4jukw9ucw + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: (ca_city = 'San Jose') (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: ROW__ID (type: struct), ca_location_type (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: struct) + sort order: + + Map-reduce partition columns: UDFToInteger(_col0) (type: int) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col1 (type: string) + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: struct), VALUE._col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.tbl_q4jukw9ucw + + Stage: Stage-0 + Move Operator + tables: + partition: + ca_location_type + replace: false + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.tbl_q4jukw9ucw + + Stage: Stage-2 + Stats-Aggr Operator + -- 1.7.12.4 (Apple Git-37)