From e2acd13a311f02886c30a3e66461cdc098c8ab0f Mon Sep 17 00:00:00 2001 From: Ashutosh Chauhan Date: Thu, 8 Sep 2016 16:16:37 -0700 Subject: [PATCH] HIVE-14726 : delete statement fails when spdo is on --- .../ql/optimizer/SortedDynPartitionOptimizer.java | 7 +- ql/src/test/queries/clientpositive/spdo_delete.q | 10 +++ .../test/results/clientpositive/spdo_delete.q.out | 87 ++++++++++++++++++++++ 3 files changed, 102 insertions(+), 2 deletions(-) create mode 100644 ql/src/test/queries/clientpositive/spdo_delete.q create mode 100644 ql/src/test/results/clientpositive/spdo_delete.q.out diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SortedDynPartitionOptimizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SortedDynPartitionOptimizer.java index febd446..cc402af 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SortedDynPartitionOptimizer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SortedDynPartitionOptimizer.java @@ -184,6 +184,7 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, destTable.getCols()); List sortPositions = null; List sortOrder = null; + ArrayList bucketColumns; if (fsOp.getConf().getWriteType() == AcidUtils.Operation.UPDATE || fsOp.getConf().getWriteType() == AcidUtils.Operation.DELETE) { // When doing updates and deletes we always want to sort on the rowid because the ACID @@ -191,6 +192,8 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, // ignore whatever comes from the table and enforce this sort order instead. sortPositions = Arrays.asList(0); sortOrder = Arrays.asList(1); // 1 means asc, could really use enum here in the thrift if + bucketColumns = new ArrayList<>(); // create place holder for bucket column used for sorting + bucketColumns.add(new ExprNodeColumnDesc()); // this will be replaced by _bucket_number_ } else { if (!destTable.getSortCols().isEmpty()) { // Sort columns specified by table @@ -202,6 +205,8 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, sortOrder = Lists.newArrayList(); inferSortPositions(fsParent, sortPositions, sortOrder); } + List colInfos = fsParent.getSchema().getSignature(); + bucketColumns = getPositionsToExprNodes(bucketPositions, colInfos); } List sortNullOrder = new ArrayList(); for (int order : sortOrder) { @@ -212,8 +217,6 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, for (int i : sortOrder) LOG.debug("sort order " + i); for (int i : sortNullOrder) LOG.debug("sort null order " + i); List partitionPositions = getPartitionPositions(dpCtx, fsParent.getSchema()); - List colInfos = fsParent.getSchema().getSignature(); - ArrayList bucketColumns = getPositionsToExprNodes(bucketPositions, colInfos); // update file sink descriptor fsOp.getConf().setMultiFileSpray(false); diff --git a/ql/src/test/queries/clientpositive/spdo_delete.q b/ql/src/test/queries/clientpositive/spdo_delete.q new file mode 100644 index 0000000..8b3f9c7 --- /dev/null +++ b/ql/src/test/queries/clientpositive/spdo_delete.q @@ -0,0 +1,10 @@ +set hive.optimize.sort.dynamic.partition=true; +set hive.exec.dynamic.partition.mode=nonstrict; +set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager; + +CREATE TABLE tbl_q4jukw9ucw (ca_street_number string, ca_city string, ca_state string) PARTITIONED BY (ca_location_type string) CLUSTERED BY (ca_state) INTO 50 BUCKETS STORED AS ORC TBLPROPERTIES('transactional'='true'); +explain DELETE FROM tbl_q4jukw9ucw WHERE ca_city = 'San Jose'; +reset -d hive.optimize.sort.dynamic.partition; +reset -d hive.exec.dynamic.partition.mode; +reset -d hive.txn.manager; + diff --git a/ql/src/test/results/clientpositive/spdo_delete.q.out b/ql/src/test/results/clientpositive/spdo_delete.q.out new file mode 100644 index 0000000..16b7f8f --- /dev/null +++ b/ql/src/test/results/clientpositive/spdo_delete.q.out @@ -0,0 +1,87 @@ +PREHOOK: query: CREATE TABLE tbl_q4jukw9ucw (ca_street_number string, ca_city string, ca_state string) PARTITIONED BY (ca_location_type string) CLUSTERED BY (ca_state) INTO 50 BUCKETS STORED AS ORC TBLPROPERTIES('transactional'='true') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@tbl_q4jukw9ucw +POSTHOOK: query: CREATE TABLE tbl_q4jukw9ucw (ca_street_number string, ca_city string, ca_state string) PARTITIONED BY (ca_location_type string) CLUSTERED BY (ca_state) INTO 50 BUCKETS STORED AS ORC TBLPROPERTIES('transactional'='true') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@tbl_q4jukw9ucw +PREHOOK: query: explain DELETE FROM tbl_q4jukw9ucw WHERE ca_city = 'San Jose' +PREHOOK: type: QUERY +POSTHOOK: query: explain DELETE FROM tbl_q4jukw9ucw WHERE ca_city = 'San Jose' +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: tbl_q4jukw9ucw + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: (ca_city = 'San Jose') (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: ROW__ID (type: struct), ca_location_type (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: struct) + sort order: + + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col1 (type: string) + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: struct), VALUE._col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col1 (type: string), '_bucket_number' (type: string), _col0 (type: struct) + sort order: +++ + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Operator Tree: + Select Operator + expressions: KEY._col0 (type: struct), KEY._col1 (type: string), KEY.'_bucket_number' (type: string) + outputColumnNames: _col0, _col1, '_bucket_number' + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.tbl_q4jukw9ucw + + Stage: Stage-0 + Move Operator + tables: + partition: + ca_location_type + replace: false + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.tbl_q4jukw9ucw + + Stage: Stage-3 + Stats-Aggr Operator + -- 1.7.12.4 (Apple Git-37)