diff --git itests/src/test/resources/testconfiguration.properties itests/src/test/resources/testconfiguration.properties index d08528f319..870a9b68e4 100644 --- itests/src/test/resources/testconfiguration.properties +++ itests/src/test/resources/testconfiguration.properties @@ -1043,6 +1043,7 @@ spark.query.files=add_part_multiple.q, \ bucket2.q, \ bucket3.q, \ bucket4.q, \ + bucket7.q, \ bucket_map_join_1.q, \ bucket_map_join_2.q, \ bucket_map_join_spark1.q, \ diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/TableScanOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/TableScanOperator.java index 5780bd4414..744cdf5d28 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/TableScanOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/TableScanOperator.java @@ -151,6 +151,10 @@ private boolean checkSetDone(Object row, int tag) { @Override public void cleanUpInputFileChangedOp() throws HiveException { inputFileChanged = true; + updateFileId(); + } + + private void updateFileId() { // If the file name to bucket number mapping is maintained, store the bucket number // in the execution context. This is needed for the following scenario: // insert overwrite table T1 select * from T2; @@ -282,6 +286,9 @@ protected void initializeOp(Configuration hconf) throws HiveException { @Override public void closeOp(boolean abort) throws HiveException { + if (getExecContext() != null && getExecContext().getFileId() == null) { + updateFileId(); + } if (conf != null) { if (conf.isGatherStats() && stats.size() != 0) { publishStats(); diff --git ql/src/test/queries/clientpositive/bucket7.q ql/src/test/queries/clientpositive/bucket7.q new file mode 100644 index 0000000000..d4f9bca474 --- /dev/null +++ ql/src/test/queries/clientpositive/bucket7.q @@ -0,0 +1,12 @@ +--! qt:dataset: +set hive.enforce.bucketing=true; +set hive.enforce.sorting=true; +set hive.optimize.bucketingsorting=true; + +create table bucket1 (id int, val string) clustered by (id) sorted by (id ASC) INTO 4 BUCKETS; +insert into bucket1 values (1, 'abc'), (3, 'abc'); +select * from bucket1; + +create table bucket2 like bucket1; +insert overwrite table bucket2 select * from bucket1; +select * from bucket2; diff --git ql/src/test/results/clientpositive/bucket7.q.out ql/src/test/results/clientpositive/bucket7.q.out new file mode 100644 index 0000000000..5b68c59c31 --- /dev/null +++ ql/src/test/results/clientpositive/bucket7.q.out @@ -0,0 +1,56 @@ +PREHOOK: query: create table bucket1 (id int, val string) clustered by (id) sorted by (id ASC) INTO 4 BUCKETS +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@bucket1 +POSTHOOK: query: create table bucket1 (id int, val string) clustered by (id) sorted by (id ASC) INTO 4 BUCKETS +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@bucket1 +PREHOOK: query: insert into bucket1 values (1, 'abc'), (3, 'abc') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@bucket1 +POSTHOOK: query: insert into bucket1 values (1, 'abc'), (3, 'abc') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@bucket1 +POSTHOOK: Lineage: bucket1.id SCRIPT [] +POSTHOOK: Lineage: bucket1.val SCRIPT [] +PREHOOK: query: select * from bucket1 +PREHOOK: type: QUERY +PREHOOK: Input: default@bucket1 +#### A masked pattern was here #### +POSTHOOK: query: select * from bucket1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@bucket1 +#### A masked pattern was here #### +1 abc +3 abc +PREHOOK: query: create table bucket2 like bucket1 +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@bucket2 +POSTHOOK: query: create table bucket2 like bucket1 +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@bucket2 +PREHOOK: query: insert overwrite table bucket2 select * from bucket1 +PREHOOK: type: QUERY +PREHOOK: Input: default@bucket1 +PREHOOK: Output: default@bucket2 +POSTHOOK: query: insert overwrite table bucket2 select * from bucket1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@bucket1 +POSTHOOK: Output: default@bucket2 +POSTHOOK: Lineage: bucket2.id SIMPLE [(bucket1)bucket1.FieldSchema(name:id, type:int, comment:null), ] +POSTHOOK: Lineage: bucket2.val SIMPLE [(bucket1)bucket1.FieldSchema(name:val, type:string, comment:null), ] +PREHOOK: query: select * from bucket2 +PREHOOK: type: QUERY +PREHOOK: Input: default@bucket2 +#### A masked pattern was here #### +POSTHOOK: query: select * from bucket2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@bucket2 +#### A masked pattern was here #### +1 abc +3 abc diff --git ql/src/test/results/clientpositive/spark/bucket7.q.out ql/src/test/results/clientpositive/spark/bucket7.q.out new file mode 100644 index 0000000000..5b68c59c31 --- /dev/null +++ ql/src/test/results/clientpositive/spark/bucket7.q.out @@ -0,0 +1,56 @@ +PREHOOK: query: create table bucket1 (id int, val string) clustered by (id) sorted by (id ASC) INTO 4 BUCKETS +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@bucket1 +POSTHOOK: query: create table bucket1 (id int, val string) clustered by (id) sorted by (id ASC) INTO 4 BUCKETS +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@bucket1 +PREHOOK: query: insert into bucket1 values (1, 'abc'), (3, 'abc') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@bucket1 +POSTHOOK: query: insert into bucket1 values (1, 'abc'), (3, 'abc') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@bucket1 +POSTHOOK: Lineage: bucket1.id SCRIPT [] +POSTHOOK: Lineage: bucket1.val SCRIPT [] +PREHOOK: query: select * from bucket1 +PREHOOK: type: QUERY +PREHOOK: Input: default@bucket1 +#### A masked pattern was here #### +POSTHOOK: query: select * from bucket1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@bucket1 +#### A masked pattern was here #### +1 abc +3 abc +PREHOOK: query: create table bucket2 like bucket1 +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@bucket2 +POSTHOOK: query: create table bucket2 like bucket1 +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@bucket2 +PREHOOK: query: insert overwrite table bucket2 select * from bucket1 +PREHOOK: type: QUERY +PREHOOK: Input: default@bucket1 +PREHOOK: Output: default@bucket2 +POSTHOOK: query: insert overwrite table bucket2 select * from bucket1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@bucket1 +POSTHOOK: Output: default@bucket2 +POSTHOOK: Lineage: bucket2.id SIMPLE [(bucket1)bucket1.FieldSchema(name:id, type:int, comment:null), ] +POSTHOOK: Lineage: bucket2.val SIMPLE [(bucket1)bucket1.FieldSchema(name:val, type:string, comment:null), ] +PREHOOK: query: select * from bucket2 +PREHOOK: type: QUERY +PREHOOK: Input: default@bucket2 +#### A masked pattern was here #### +POSTHOOK: query: select * from bucket2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@bucket2 +#### A masked pattern was here #### +1 abc +3 abc