diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java index 3ec63ee..9071d1b 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java @@ -39,6 +39,8 @@ import org.apache.hadoop.hive.common.FileUtils; import org.apache.hadoop.hive.common.StatsSetupConst; import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.io.HdfsUtils; +import org.apache.hadoop.hive.io.HdfsUtils.HadoopFileStatus; import org.apache.hadoop.hive.metastore.MetaStoreUtils; import org.apache.hadoop.hive.ql.CompilationOpContext; import org.apache.hadoop.hive.ql.ErrorMsg; @@ -865,6 +867,29 @@ private FSPaths createNewPaths(String dirName) throws HiveException { createBucketFiles(fsp2); valToPaths.put(dirName, fsp2); } + + Path destf = fsp2.tmpPath; + final FileSystem destFs; + try { + destFs = destf.getFileSystem(hconf); + } catch (IOException e) { + LOG.error("Failed to get dest fs", e); + throw new HiveException(e.getMessage(), e); + } + HadoopFileStatus fullDestStatus; + final boolean inheritPerms = HiveConf.getBoolVar(hconf, + HiveConf.ConfVars.HIVE_WAREHOUSE_SUBDIR_INHERIT_PERMS); + try { + if (inheritPerms) { + fullDestStatus = new HadoopFileStatus(hconf, destFs, destf.getParent()); + } else { + fullDestStatus = new HadoopFileStatus(hconf, destFs, destf); + } + HdfsUtils.setFullFileStatus(hconf, fullDestStatus, destFs, destf, true); + } catch (IOException e) { + throw new HiveException(e); + } + return fsp2; } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java index dcfc2b5..9204313 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java @@ -2694,9 +2694,7 @@ private static void copyFiles(final HiveConf conf, final FileSystem destFs, destPath = mvFile(conf, srcP, destPath, isSrcLocal, srcFs, destFs, name, filetype); } - if (inheritPerms) { - HdfsUtils.setFullFileStatus(conf, fullDestStatus, destFs, destPath, false); - } + // The permission is set in FileSinkOperator if (null != newFiles) { newFiles.add(destPath); } @@ -2870,13 +2868,9 @@ public Void call() throws Exception { SessionState.setCurrentSessionState(parentSession); Path destPath = new Path(destf, status.getPath().getName()); try { - if(destFs.rename(status.getPath(), destf)) { - if (inheritPerms) { - HdfsUtils.setFullFileStatus(conf, desiredStatus, destFs, destPath, false); - } - } else { - throw new IOException("rename for src path: " + status.getPath() + " to dest path:" - + destPath + " returned false"); + if (!destFs.rename(status.getPath(), destf)) { + throw new IOException("rename for src path: " + status.getPath() + + " to dest path:" + destPath + " returned false"); } } catch (IOException ioe) { LOG.error("Failed to rename/set permissions. Src path: {} Dest path: {}", status.getPath(), destPath); diff --git a/ql/src/test/queries/clientpositive/set_file_permission_in_fs.q b/ql/src/test/queries/clientpositive/set_file_permission_in_fs.q new file mode 100644 index 0000000..e15a406 --- /dev/null +++ b/ql/src/test/queries/clientpositive/set_file_permission_in_fs.q @@ -0,0 +1,18 @@ +set hive.mapred.mode=nonstrict; +CREATE TABLE t1 (c1 BIGINT, c2 STRING); + +CREATE TABLE t2 (c1 INT, c2 STRING) +PARTITIONED BY (p1 STRING); + +LOAD DATA LOCAL INPATH '../../data/files/dynamic_partition_insert.txt' INTO TABLE t1; +LOAD DATA LOCAL INPATH '../../data/files/dynamic_partition_insert.txt' INTO TABLE t1; + +SET hive.exec.dynamic.partition.mode=nonstrict; +explain INSERT OVERWRITE TABLE t2 partition(p1) SELECT *,c1 AS p1 FROM t1 DISTRIBUTE BY p1; + +INSERT OVERWRITE TABLE t2 partition(p1) SELECT *,c1 AS p1 FROM t1 DISTRIBUTE BY p1; + +SELECT * FROM t2; + +DROP TABLE t1; +DROP TABLE t2; diff --git a/ql/src/test/results/clientpositive/set_file_permission_in_fs.q.out b/ql/src/test/results/clientpositive/set_file_permission_in_fs.q.out new file mode 100644 index 0000000..4c5d2a5 --- /dev/null +++ b/ql/src/test/results/clientpositive/set_file_permission_in_fs.q.out @@ -0,0 +1,154 @@ +PREHOOK: query: CREATE TABLE t1 (c1 BIGINT, c2 STRING) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@t1 +POSTHOOK: query: CREATE TABLE t1 (c1 BIGINT, c2 STRING) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@t1 +PREHOOK: query: CREATE TABLE t2 (c1 INT, c2 STRING) +PARTITIONED BY (p1 STRING) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@t2 +POSTHOOK: query: CREATE TABLE t2 (c1 INT, c2 STRING) +PARTITIONED BY (p1 STRING) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@t2 +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/dynamic_partition_insert.txt' INTO TABLE t1 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@t1 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/dynamic_partition_insert.txt' INTO TABLE t1 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@t1 +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/dynamic_partition_insert.txt' INTO TABLE t1 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@t1 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/dynamic_partition_insert.txt' INTO TABLE t1 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@t1 +PREHOOK: query: explain INSERT OVERWRITE TABLE t2 partition(p1) SELECT *,c1 AS p1 FROM t1 DISTRIBUTE BY p1 +PREHOOK: type: QUERY +POSTHOOK: query: explain INSERT OVERWRITE TABLE t2 partition(p1) SELECT *,c1 AS p1 FROM t1 DISTRIBUTE BY p1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 1 Data size: 68 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: c1 (type: bigint), c2 (type: string), c1 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 68 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Map-reduce partition columns: _col2 (type: bigint) + Statistics: Num rows: 1 Data size: 68 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint), _col1 (type: string) + Reduce Operator Tree: + Select Operator + expressions: UDFToInteger(VALUE._col0) (type: int), VALUE._col1 (type: string), VALUE._col0 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 68 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 68 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.t2 + + Stage: Stage-0 + Move Operator + tables: + partition: + p1 + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.t2 + + Stage: Stage-2 + Stats-Aggr Operator + +PREHOOK: query: INSERT OVERWRITE TABLE t2 partition(p1) SELECT *,c1 AS p1 FROM t1 DISTRIBUTE BY p1 +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +PREHOOK: Output: default@t2 +POSTHOOK: query: INSERT OVERWRITE TABLE t2 partition(p1) SELECT *,c1 AS p1 FROM t1 DISTRIBUTE BY p1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +POSTHOOK: Output: default@t2@p1=1 +POSTHOOK: Output: default@t2@p1=2 +POSTHOOK: Output: default@t2@p1=3 +POSTHOOK: Output: default@t2@p1=4 +POSTHOOK: Output: default@t2@p1=5 +POSTHOOK: Lineage: t2 PARTITION(p1=1).c1 EXPRESSION [(t1)t1.FieldSchema(name:c1, type:bigint, comment:null), ] +POSTHOOK: Lineage: t2 PARTITION(p1=1).c2 SIMPLE [(t1)t1.FieldSchema(name:c2, type:string, comment:null), ] +POSTHOOK: Lineage: t2 PARTITION(p1=2).c1 EXPRESSION [(t1)t1.FieldSchema(name:c1, type:bigint, comment:null), ] +POSTHOOK: Lineage: t2 PARTITION(p1=2).c2 SIMPLE [(t1)t1.FieldSchema(name:c2, type:string, comment:null), ] +POSTHOOK: Lineage: t2 PARTITION(p1=3).c1 EXPRESSION [(t1)t1.FieldSchema(name:c1, type:bigint, comment:null), ] +POSTHOOK: Lineage: t2 PARTITION(p1=3).c2 SIMPLE [(t1)t1.FieldSchema(name:c2, type:string, comment:null), ] +POSTHOOK: Lineage: t2 PARTITION(p1=4).c1 EXPRESSION [(t1)t1.FieldSchema(name:c1, type:bigint, comment:null), ] +POSTHOOK: Lineage: t2 PARTITION(p1=4).c2 SIMPLE [(t1)t1.FieldSchema(name:c2, type:string, comment:null), ] +POSTHOOK: Lineage: t2 PARTITION(p1=5).c1 EXPRESSION [(t1)t1.FieldSchema(name:c1, type:bigint, comment:null), ] +POSTHOOK: Lineage: t2 PARTITION(p1=5).c2 SIMPLE [(t1)t1.FieldSchema(name:c2, type:string, comment:null), ] +PREHOOK: query: SELECT * FROM t2 +PREHOOK: type: QUERY +PREHOOK: Input: default@t2 +PREHOOK: Input: default@t2@p1=1 +PREHOOK: Input: default@t2@p1=2 +PREHOOK: Input: default@t2@p1=3 +PREHOOK: Input: default@t2@p1=4 +PREHOOK: Input: default@t2@p1=5 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM t2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t2 +POSTHOOK: Input: default@t2@p1=1 +POSTHOOK: Input: default@t2@p1=2 +POSTHOOK: Input: default@t2@p1=3 +POSTHOOK: Input: default@t2@p1=4 +POSTHOOK: Input: default@t2@p1=5 +#### A masked pattern was here #### +1 one 1 +1 one 1 +2 two 2 +2 two 2 +3 three 3 +3 three 3 +4 four 4 +4 four 4 +5 five 5 +5 five 5 +PREHOOK: query: DROP TABLE t1 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@t1 +PREHOOK: Output: default@t1 +POSTHOOK: query: DROP TABLE t1 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@t1 +POSTHOOK: Output: default@t1 +PREHOOK: query: DROP TABLE t2 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@t2 +PREHOOK: Output: default@t2 +POSTHOOK: query: DROP TABLE t2 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@t2 +POSTHOOK: Output: default@t2