commit 3bd2ba2e1fdd754b2ee03d48bd064fb6fd5b843e Author: Vihang Karajgaonkar Date: Mon Dec 5 11:26:35 2016 -0800 HIVE-15355 : Concurrency issues during parallel moveFile due to HDFSUtils.setFullFileStatus diff --git a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/io/TestHadoopFileStatus.java b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/io/TestHadoopFileStatus.java new file mode 100644 index 0000000000000000000000000000000000000000..b9fc09bea3e36c1dba63603d5daf5b56e643e570 --- /dev/null +++ b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/io/TestHadoopFileStatus.java @@ -0,0 +1,92 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.io; + +import java.io.IOException; +import java.util.List; + +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.permission.AclEntry; +import org.apache.hadoop.fs.permission.AclEntryScope; +import org.apache.hadoop.fs.permission.AclEntryType; +import org.apache.hadoop.fs.permission.AclStatus; +import org.apache.hadoop.fs.permission.FsAction; +import org.apache.hadoop.hdfs.DistributedFileSystem; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.io.HdfsUtils.HadoopFileStatus; +import org.junit.Assert; +import org.junit.BeforeClass; +import org.junit.Test; +import org.mockito.Mockito; + +import com.google.common.base.Predicate; +import com.google.common.collect.Iterables; +import com.google.common.collect.Lists; + +public class TestHadoopFileStatus { + private static HiveConf hiveConf; + private static HadoopFileStatus sourceStatus; + + @BeforeClass + public static void setUp() throws IOException { + hiveConf = new HiveConf(TestHadoopFileStatus.class); + hiveConf.set("dfs.namenode.acls.enabled", "true"); + } + + private static AclEntry newAclEntry(AclEntryScope scope, AclEntryType type, FsAction permission) { + return new AclEntry.Builder().setScope(scope).setType(type).setPermission(permission).build(); + } + + /* + * HdfsUtils.setFullFileStatus(..) is called from multiple parallel threads. If AclEntries + * is modifiable the method will not be thread safe and could cause random concurrency issues + * This test case checks if the aclEntries returned from HadoopFileStatus is thread-safe or not + */ + @Test(expected = UnsupportedOperationException.class) + public void testHadoopFileStatusAclEntries() throws IOException { + FileSystem mockDfs = Mockito.mock(DistributedFileSystem.class); + Path mockPath = Mockito.mock(Path.class); + + List aclEntries = Lists.newArrayList(); + aclEntries.add(newAclEntry(AclEntryScope.ACCESS, AclEntryType.USER, FsAction.ALL)); + aclEntries.add(newAclEntry(AclEntryScope.ACCESS, AclEntryType.GROUP, FsAction.READ_EXECUTE)); + aclEntries.add(newAclEntry(AclEntryScope.ACCESS, AclEntryType.OTHER, FsAction.NONE)); + AclStatus aclStatus = new AclStatus.Builder().owner("dummyOwner").group("dummyGroup") + .stickyBit(true).addEntries(aclEntries).build(); + + FileStatus mockFileStatus = Mockito.mock(FileStatus.class); + Mockito.when(mockDfs.getAclStatus(mockPath)).thenReturn(aclStatus); + Mockito.when(mockDfs.getFileStatus(mockPath)).thenReturn(mockFileStatus); + sourceStatus = new HadoopFileStatus(hiveConf, mockDfs, mockPath); + Assert.assertNotNull(sourceStatus.getAclEntries()); + Assert.assertTrue(sourceStatus.getAclEntries().size() == 3); + Iterables.removeIf(sourceStatus.getAclEntries(), new Predicate() { + @Override + public boolean apply(AclEntry input) { + if (input.getName() == null) { + return true; + } + return false; + } + }); + } + +} diff --git a/shims/common/src/main/java/org/apache/hadoop/hive/io/HdfsUtils.java b/shims/common/src/main/java/org/apache/hadoop/hive/io/HdfsUtils.java index 70a6857464a38d9a425511b78b54d4231f131f1f..7b6a9bd68af47b933e65624c0239c7d91a400120 100644 --- a/shims/common/src/main/java/org/apache/hadoop/hive/io/HdfsUtils.java +++ b/shims/common/src/main/java/org/apache/hadoop/hive/io/HdfsUtils.java @@ -19,6 +19,8 @@ package org.apache.hadoop.hive.io; import java.io.IOException; +import java.util.ArrayList; +import java.util.Collections; import java.util.List; import org.apache.commons.lang.ArrayUtils; @@ -68,12 +70,10 @@ public static void setFullFileStatus(Configuration conf, HdfsUtils.HadoopFileSta boolean aclEnabled = Objects.equal(conf.get("dfs.namenode.acls.enabled"), "true"); FsPermission sourcePerm = fStatus.getPermission(); List aclEntries = null; - AclStatus aclStatus; if (aclEnabled) { - aclStatus = sourceStatus.getAclStatus(); - if (aclStatus != null) { - LOG.trace(aclStatus.toString()); - aclEntries = aclStatus.getEntries(); + if (sourceStatus.getAclEntries() != null) { + LOG.trace(sourceStatus.aclStatus.toString()); + aclEntries = new ArrayList<>(sourceStatus.getAclEntries()); removeBaseAclEntries(aclEntries); //the ACL api's also expect the tradition user/group/other permission in the form of ACL @@ -193,8 +193,9 @@ public HadoopFileStatus(Configuration conf, FileSystem fs, Path file) throws IOE public FileStatus getFileStatus() { return fileStatus; } - public AclStatus getAclStatus() { - return aclStatus; + + public List getAclEntries() { + return aclStatus == null ? null : Collections.unmodifiableList(aclStatus.getEntries()); } } }