commit 66ed8755ecf936e1ca131a06a590f8de4fd79ab9 Author: Daniel Dai Date: Sun Aug 2 21:32:53 2015 -0700 HIVE-11438: Join a ACID table with non-ACID table fail with MR on 1.0.0 diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties index 3690e5c..18289f7 100644 --- a/itests/src/test/resources/testconfiguration.properties +++ b/itests/src/test/resources/testconfiguration.properties @@ -25,6 +25,7 @@ minimr.query.files=auto_sortmerge_join_16.q,\ infer_bucket_sort_reducers_power_two.q,\ input16_cc.q,\ join1.q,\ + join_acid_non_acid.q,\ leftsemijoin_mr.q,\ list_bucket_dml_10.q,\ load_fs2.q,\ diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java index 913288f..b2db584 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java @@ -382,7 +382,15 @@ private static BaseWork getBaseWork(Configuration conf, String name) { in = new ByteArrayInputStream(planBytes); in = new InflaterInputStream(in); } else { - in = new FileInputStream(localPath.toUri().getPath()); + try { + in = new FileInputStream(localPath.toUri().getPath()); + } catch (FileNotFoundException fnf) { + } + // If it is on frontend, localPath does not exist, try + // to fetch it on hdfs + if (in == null) { + in = path.getFileSystem(conf).open(path); + } } if(MAP_PLAN_NAME.equals(name)){ diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java index 200daa5..be0c947 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java @@ -132,7 +132,7 @@ @Override public boolean shouldSkipCombine(Path path, Configuration conf) throws IOException { - return (conf.get(AcidUtils.CONF_ACID_KEY) != null) || AcidUtils.isAcid(path, conf); + return (conf.getBoolean(AcidUtils.CONF_ACID_KEY, false)) || AcidUtils.isAcid(path, conf); } private static class OrcRecordReader diff --git a/ql/src/test/queries/clientpositive/join_acid_non_acid.q b/ql/src/test/queries/clientpositive/join_acid_non_acid.q new file mode 100644 index 0000000..43d768f --- /dev/null +++ b/ql/src/test/queries/clientpositive/join_acid_non_acid.q @@ -0,0 +1,24 @@ +set hive.support.concurrency=true; +set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager; + +CREATE TABLE orc_update_table (k1 INT, f1 STRING, op_code STRING) +CLUSTERED BY (k1) INTO 2 BUCKETS +STORED AS ORC TBLPROPERTIES("transactional"="true"); + +INSERT INTO TABLE orc_update_table VALUES (1, 'a', 'I'); + +CREATE TABLE orc_table (k1 INT, f1 STRING) +CLUSTERED BY (k1) SORTED BY (k1) INTO 2 BUCKETS +STORED AS ORC; + +INSERT OVERWRITE TABLE orc_table VALUES (1, 'x'); + +set hive.cbo.enable=true; +SET hive.execution.engine=mr; +SET hive.auto.convert.join=false; +SET hive.input.format=org.apache.hadoop.hive.ql.io.CombineHiveInputFormat; +SET hive.conf.validation=false; +SET hive.doing.acid=false; + +SELECT t1.*, t2.* FROM orc_table t1 +JOIN orc_update_table t2 ON t1.k1=t2.k1 ORDER BY t1.k1; diff --git a/ql/src/test/results/clientpositive/join_acid_non_acid.q.out b/ql/src/test/results/clientpositive/join_acid_non_acid.q.out new file mode 100644 index 0000000..4905351 --- /dev/null +++ b/ql/src/test/results/clientpositive/join_acid_non_acid.q.out @@ -0,0 +1,58 @@ +PREHOOK: query: CREATE TABLE orc_update_table (k1 INT, f1 STRING, op_code STRING) +CLUSTERED BY (k1) INTO 2 BUCKETS +STORED AS ORC TBLPROPERTIES("transactional"="true") +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@orc_update_table +POSTHOOK: query: CREATE TABLE orc_update_table (k1 INT, f1 STRING, op_code STRING) +CLUSTERED BY (k1) INTO 2 BUCKETS +STORED AS ORC TBLPROPERTIES("transactional"="true") +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@orc_update_table +PREHOOK: query: INSERT INTO TABLE orc_update_table VALUES (1, 'a', 'I') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__1 +PREHOOK: Output: default@orc_update_table +POSTHOOK: query: INSERT INTO TABLE orc_update_table VALUES (1, 'a', 'I') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__1 +POSTHOOK: Output: default@orc_update_table +POSTHOOK: Lineage: orc_update_table.f1 SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: orc_update_table.k1 EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: orc_update_table.op_code SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +PREHOOK: query: CREATE TABLE orc_table (k1 INT, f1 STRING) +CLUSTERED BY (k1) SORTED BY (k1) INTO 2 BUCKETS +STORED AS ORC +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@orc_table +POSTHOOK: query: CREATE TABLE orc_table (k1 INT, f1 STRING) +CLUSTERED BY (k1) SORTED BY (k1) INTO 2 BUCKETS +STORED AS ORC +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@orc_table +PREHOOK: query: INSERT OVERWRITE TABLE orc_table VALUES (1, 'x') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__2 +PREHOOK: Output: default@orc_table +POSTHOOK: query: INSERT OVERWRITE TABLE orc_table VALUES (1, 'x') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__2 +POSTHOOK: Output: default@orc_table +POSTHOOK: Lineage: orc_table.f1 SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: orc_table.k1 EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +PREHOOK: query: SELECT t1.*, t2.* FROM orc_table t1 +JOIN orc_update_table t2 ON t1.k1=t2.k1 ORDER BY t1.k1 +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_table +PREHOOK: Input: default@orc_update_table +#### A masked pattern was here #### +POSTHOOK: query: SELECT t1.*, t2.* FROM orc_table t1 +JOIN orc_update_table t2 ON t1.k1=t2.k1 ORDER BY t1.k1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orc_table +POSTHOOK: Input: default@orc_update_table +#### A masked pattern was here #### +1 x 1 a I