diff --git a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java index 8c948a98cc98d7997c86310d0fe32c61f72cc744..b6f527e241d914b209f20d21f3e0819abd9d5885 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java @@ -1400,7 +1400,7 @@ public Partition loadPartition(Path loadPath, Table tbl, } else { newFiles = new ArrayList(); FileSystem fs = tbl.getDataLocation().getFileSystem(conf); - Hive.copyFiles(conf, loadPath, newPartPath, fs, isSrcLocal, isAcid, newFiles); + Hive.copyFiles(conf, loadPath, newPartPath, fs, isSrcLocal, isAcid, newFiles, tbl); } boolean forceCreate = (!holdDDLTime) ? true : false; @@ -1642,7 +1642,7 @@ public void loadTable(Path loadPath, String tableName, boolean replace, FileSystem fs; try { fs = tbl.getDataLocation().getFileSystem(sessionConf); - copyFiles(sessionConf, loadPath, tbl.getPath(), fs, isSrcLocal, isAcid, newFiles); + copyFiles(sessionConf, loadPath, tbl.getPath(), fs, isSrcLocal, isAcid, newFiles, tbl); } catch (IOException e) { throw new HiveException("addFiles: filesystem error in check phase", e); } @@ -2665,7 +2665,7 @@ public static boolean moveFile(HiveConf conf, Path srcf, Path destf, * @throws HiveException */ static protected void copyFiles(HiveConf conf, Path srcf, Path destf, - FileSystem fs, boolean isSrcLocal, boolean isAcid, List newFiles) throws HiveException { + FileSystem fs, boolean isSrcLocal, boolean isAcid, List newFiles, Table tlb) throws HiveException { boolean inheritPerms = HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_WAREHOUSE_SUBDIR_INHERIT_PERMS); try { @@ -2701,10 +2701,19 @@ static protected void copyFiles(HiveConf conf, Path srcf, Path destf, } else { // check that source and target paths exist List> result = checkPaths(conf, fs, srcs, srcFs, destf, false); + boolean isEnforceBucketing = HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVEENFORCEBUCKETING) + && tlb.getNumBuckets() > 0; // move it, move it + int warnCount = 0; //use the variable to avoid too many warnings. try { for (List sdpairs : result) { for (Path[] sdpair : sdpairs) { + if(isEnforceBucketing && warnCount == 0 && sdpair[0]!=null && sdpair[1] != null && (!sdpair[0].getName().equalsIgnoreCase(sdpair[1].getName()))) { + String msg = "Insert into bucketed table is not supported, it may result in incorrect metadata. Table name: " + tlb.getTableName(); + SessionState.getConsole().printInfo( + String.format("Warning: %s", msg)); + ++warnCount; + } if (!moveFile(conf, sdpair[0], sdpair[1], fs, false, isSrcLocal)) { throw new IOException("Cannot move " + sdpair[0] + " to " + sdpair[1]); diff --git a/ql/src/test/queries/clientpositive/insertinto_nonemptybucket.q b/ql/src/test/queries/clientpositive/insertinto_nonemptybucket.q new file mode 100644 index 0000000000000000000000000000000000000000..f6364f1b133df7bb2652057f36237a2c0623feb7 --- /dev/null +++ b/ql/src/test/queries/clientpositive/insertinto_nonemptybucket.q @@ -0,0 +1,8 @@ +drop table if exists buckettest1; +create table if not exists buckettest1 (data int) partitioned by (state string) clustered by (data) into 2 buckets; +set hive.enforce.bucketing = true; +set hive.enforce.sorting=true; +set hive.exec.dynamic.partition = true; +set hive.exec.dynamic.partition.mode = nonstrict; +insert into table buckettest1 partition(state) select key, 'MA' from src where key < 100; +insert into table buckettest1 partition(state) select key, 'MA' from src where key > 100 and key < 200; diff --git a/ql/src/test/results/clientpositive/insertinto_nonemptybucket.q.out b/ql/src/test/results/clientpositive/insertinto_nonemptybucket.q.out new file mode 100644 index 0000000000000000000000000000000000000000..0d9b13c5b9bebc447d7c91174954bb97fcdd64f9 --- /dev/null +++ b/ql/src/test/results/clientpositive/insertinto_nonemptybucket.q.out @@ -0,0 +1,30 @@ +PREHOOK: query: drop table if exists buckettest1 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table if exists buckettest1 +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table if not exists buckettest1 (data int) partitioned by (state string) clustered by (data) into 2 buckets +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@buckettest1 +POSTHOOK: query: create table if not exists buckettest1 (data int) partitioned by (state string) clustered by (data) into 2 buckets +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@buckettest1 +PREHOOK: query: insert into table buckettest1 partition(state) select key, 'MA' from src where key < 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@buckettest1 +POSTHOOK: query: insert into table buckettest1 partition(state) select key, 'MA' from src where key < 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@buckettest1@state=MA +POSTHOOK: Lineage: buckettest1 PARTITION(state=MA).data EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +PREHOOK: query: insert into table buckettest1 partition(state) select key, 'MA' from src where key > 100 and key < 200 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@buckettest1 +POSTHOOK: query: insert into table buckettest1 partition(state) select key, 'MA' from src where key > 100 and key < 200 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@buckettest1@state=MA +POSTHOOK: Lineage: buckettest1 PARTITION(state=MA).data EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]