diff --git a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java index 8c948a98cc98d7997c86310d0fe32c61f72cc744..a84123d08391ff73ab6ea00877ea3fae6848b772 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java @@ -1400,7 +1400,7 @@ public Partition loadPartition(Path loadPath, Table tbl, } else { newFiles = new ArrayList(); FileSystem fs = tbl.getDataLocation().getFileSystem(conf); - Hive.copyFiles(conf, loadPath, newPartPath, fs, isSrcLocal, isAcid, newFiles); + Hive.copyFiles(conf, loadPath, newPartPath, fs, isSrcLocal, isAcid, newFiles, tbl); } boolean forceCreate = (!holdDDLTime) ? true : false; @@ -1642,7 +1642,7 @@ public void loadTable(Path loadPath, String tableName, boolean replace, FileSystem fs; try { fs = tbl.getDataLocation().getFileSystem(sessionConf); - copyFiles(sessionConf, loadPath, tbl.getPath(), fs, isSrcLocal, isAcid, newFiles); + copyFiles(sessionConf, loadPath, tbl.getPath(), fs, isSrcLocal, isAcid, newFiles, tbl); } catch (IOException e) { throw new HiveException("addFiles: filesystem error in check phase", e); } @@ -2665,7 +2665,7 @@ public static boolean moveFile(HiveConf conf, Path srcf, Path destf, * @throws HiveException */ static protected void copyFiles(HiveConf conf, Path srcf, Path destf, - FileSystem fs, boolean isSrcLocal, boolean isAcid, List newFiles) throws HiveException { + FileSystem fs, boolean isSrcLocal, boolean isAcid, List newFiles, Table tlb) throws HiveException { boolean inheritPerms = HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_WAREHOUSE_SUBDIR_INHERIT_PERMS); try { @@ -2701,10 +2701,18 @@ static protected void copyFiles(HiveConf conf, Path srcf, Path destf, } else { // check that source and target paths exist List> result = checkPaths(conf, fs, srcs, srcFs, destf, false); + boolean isEnforceBucketing = HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVEENFORCEBUCKETING) + && tlb.getNumBuckets() > 0; // move it, move it try { for (List sdpairs : result) { for (Path[] sdpair : sdpairs) { + if(isEnforceBucketing && sdpair[0]!=null && sdpair[1] != null && (!sdpair[0].getName().equalsIgnoreCase(sdpair[1].getName()))) { + LOG.error(ErrorMsg.INSERT_INTO_BUCKETIZED_TABLE. + getMsg("Table: " + tlb.getTableName())); + throw new HiveException(ErrorMsg.INSERT_INTO_BUCKETIZED_TABLE. + getMsg("Table: " + tlb.getTableName())); + } if (!moveFile(conf, sdpair[0], sdpair[1], fs, false, isSrcLocal)) { throw new IOException("Cannot move " + sdpair[0] + " to " + sdpair[1]); diff --git a/ql/src/test/queries/clientnegative/insertinto_nonemptybucket.q b/ql/src/test/queries/clientnegative/insertinto_nonemptybucket.q new file mode 100644 index 0000000000000000000000000000000000000000..f6364f1b133df7bb2652057f36237a2c0623feb7 --- /dev/null +++ b/ql/src/test/queries/clientnegative/insertinto_nonemptybucket.q @@ -0,0 +1,8 @@ +drop table if exists buckettest1; +create table if not exists buckettest1 (data int) partitioned by (state string) clustered by (data) into 2 buckets; +set hive.enforce.bucketing = true; +set hive.enforce.sorting=true; +set hive.exec.dynamic.partition = true; +set hive.exec.dynamic.partition.mode = nonstrict; +insert into table buckettest1 partition(state) select key, 'MA' from src where key < 100; +insert into table buckettest1 partition(state) select key, 'MA' from src where key > 100 and key < 200; diff --git a/ql/src/test/results/clientnegative/insertinto_nonemptybucket.q.out b/ql/src/test/results/clientnegative/insertinto_nonemptybucket.q.out new file mode 100644 index 0000000000000000000000000000000000000000..a0b605583b170b36659f5f5cea4ae33cbb30da15 --- /dev/null +++ b/ql/src/test/results/clientnegative/insertinto_nonemptybucket.q.out @@ -0,0 +1,27 @@ +PREHOOK: query: drop table if exists buckettest1 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table if exists buckettest1 +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table if not exists buckettest1 (data int) partitioned by (state string) clustered by (data) into 2 buckets +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@buckettest1 +POSTHOOK: query: create table if not exists buckettest1 (data int) partitioned by (state string) clustered by (data) into 2 buckets +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@buckettest1 +PREHOOK: query: insert into table buckettest1 partition(state) select key, 'MA' from src where key < 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@buckettest1 +POSTHOOK: query: insert into table buckettest1 partition(state) select key, 'MA' from src where key < 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@buckettest1@state=MA +POSTHOOK: Lineage: buckettest1 PARTITION(state=MA).data EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +PREHOOK: query: insert into table buckettest1 partition(state) select key, 'MA' from src where key > 100 and key < 200 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@buckettest1 +Failed with exception Bucketized tables do not support INSERT INTO: Table: buckettest1 +FAILED: Execution Error, return code 1 from org.apache.hadoop.hive.ql.exec.MoveTask