diff --git common/src/java/org/apache/hadoop/hive/conf/HiveConf.java common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index 375ceeac48..440d761f03 100644 --- common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -4453,9 +4453,9 @@ private static void populateLlapDaemonVarsSet(Set llapDaemonVarsSetLocal "Whether LLAP daemon web UI should use SSL.", "llap.daemon.service.ssl"), LLAP_CLIENT_CONSISTENT_SPLITS("hive.llap.client.consistent.splits", true, "Whether to setup split locations to match nodes on which llap daemons are running, " + - "preferring one of the locations provided by the split itself. If there is no llap daemon " + - "running on any of those locations (or on the cloud), fall back to a cache affinity to" + - " an LLAP node. This is effective only if hive.execution.mode is llap."), + "instead of using the locations provided by the split itself. If there is no llap daemon " + + "running, fall back to locations provided by the split. This is effective only if " + + "hive.execution.mode is llap"), LLAP_VALIDATE_ACLS("hive.llap.validate.acls", true, "Whether LLAP should reject permissive ACLs in some cases (e.g. its own management\n" + "protocol or ZK paths), similar to how ssh refuses a key with bad access permissions."), diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/tez/HostAffinitySplitLocationProvider.java ql/src/java/org/apache/hadoop/hive/ql/exec/tez/HostAffinitySplitLocationProvider.java index 5224429e9e..c5d96e54eb 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/tez/HostAffinitySplitLocationProvider.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/tez/HostAffinitySplitLocationProvider.java @@ -15,11 +15,11 @@ package org.apache.hadoop.hive.ql.exec.tez; import java.io.IOException; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.HashSet; import java.util.List; -import java.util.Set; + +import com.google.common.annotations.VisibleForTesting; +import com.google.common.base.Preconditions; +import com.google.common.hash.Hashing; import org.apache.hadoop.hive.serde2.SerDeUtils; import org.apache.hadoop.mapred.FileSplit; @@ -29,10 +29,6 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Preconditions; -import com.google.common.hash.Hashing; - /** * This maps a split (path + offset) to an index based on the number of locations provided. * @@ -51,20 +47,18 @@ private final boolean isDebugEnabled = LOG.isDebugEnabled(); private final List locations; - private final Set locationSet; public HostAffinitySplitLocationProvider(List knownLocations) { Preconditions.checkState(knownLocations != null && !knownLocations.isEmpty(), HostAffinitySplitLocationProvider.class.getName() + " needs at least 1 location to function"); this.locations = knownLocations; - this.locationSet = new HashSet(knownLocations); } @Override public String[] getLocations(InputSplit split) throws IOException { if (!(split instanceof FileSplit)) { - if (isDebugEnabled) { + if (LOG.isDebugEnabled()) { LOG.debug("Split: " + split + " is not a FileSplit. Using default locations"); } return split.getLocations(); @@ -72,39 +66,14 @@ public HostAffinitySplitLocationProvider(List knownLocations) { FileSplit fsplit = (FileSplit) split; String splitDesc = "Split at " + fsplit.getPath() + " with offset= " + fsplit.getStart() + ", length=" + fsplit.getLength(); - List preferredLocations = preferLocations(fsplit); - String location = - preferredLocations.get(determineLocation(preferredLocations, fsplit.getPath().toString(), - fsplit.getStart(), splitDesc)); + String location = locations.get(determineLocation( + locations, fsplit.getPath().toString(), fsplit.getStart(), splitDesc)); return (location != null) ? new String[] { location } : null; } - private List preferLocations(FileSplit fsplit) throws IOException { - if (fsplit.getLocations() == null || fsplit.getLocations().length <= 0) { - // Cloud FS - return this.locations; - } - String[] datanodes = fsplit.getLocations(); - Arrays.sort(datanodes); - ArrayList targets = new ArrayList(datanodes.length); - for (String location : datanodes) { - if (locationSet.contains(location)) { - targets.add(location); - } - } - if (targets.size() > 0) { - return targets; - } - return this.locations; - } - @VisibleForTesting public static int determineLocation( List locations, String path, long start, String desc) { - if (locations.size() == 1) { - // skip everything, this is simple - return 0; - } byte[] bytes = getHashInputForSplit(path, start); long hash1 = hash1(bytes); int index = Hashing.consistentHash(hash1, locations.size()); diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/tez/TestHostAffinitySplitLocationProvider.java ql/src/test/org/apache/hadoop/hive/ql/exec/tez/TestHostAffinitySplitLocationProvider.java index f37a10cf2d..13f4676bdd 100644 --- ql/src/test/org/apache/hadoop/hive/ql/exec/tez/TestHostAffinitySplitLocationProvider.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/tez/TestHostAffinitySplitLocationProvider.java @@ -299,48 +299,6 @@ public void testOrcSplitsLocationAffinity() throws IOException { assertArrayEquals(retLoc13, retLoc132); } - @Test (timeout = 90000000) - public void testDFSLocalityAwareAffinity() throws IOException { - List someLocations = locations.subList(0, 2); // 0,1 locations - HostAffinitySplitLocationProvider locationProvider = new HostAffinitySplitLocationProvider(someLocations); - - // Different base localities - InputSplit os1 = createMockFileSplit(true, "path1", 0, 15000, new String[] {locations.get(0), locations.get(1)}); // 0 or 1 - InputSplit os2 = createMockFileSplit(true, "path2", 0, 30000, new String[] {locations.get(2), locations.get(3)}); // 0 or 1 - InputSplit os3 = createMockFileSplit(true, "path3", 15000, 30000, new String[] {locations.get(0), locations.get(2)}); // 0 - InputSplit os4 = createMockFileSplit(true, "path4", 15000, 30000, new String[] {locations.get(1), locations.get(2)}); // 1 - - String[] retLoc1 = locationProvider.getLocations(os1); - String[] retLoc2 = locationProvider.getLocations(os2); - String[] retLoc3 = locationProvider.getLocations(os3); - String[] retLoc4 = locationProvider.getLocations(os4); - - assertEquals(1, retLoc1.length); - assertTrue(someLocations.contains(retLoc1[0])); - - assertEquals(1, retLoc2.length); - assertTrue(someLocations.contains(retLoc2[0])); - - assertEquals(1, retLoc3.length); - assertTrue(someLocations.contains(retLoc3[0])); - assertEquals(someLocations.get(0), retLoc3[0]); // is always 0 - - assertEquals(1, retLoc4.length); - assertTrue(someLocations.contains(retLoc4[0])); - assertEquals(someLocations.get(1), retLoc4[0]); // is always 1 - - String[] againLoc1 = locationProvider.getLocations(os1); - String[] againLoc2 = locationProvider.getLocations(os2); - String[] againLoc3 = locationProvider.getLocations(os3); - String[] againLoc4 = locationProvider.getLocations(os4); - - assertArrayEquals(retLoc1, againLoc1); - assertArrayEquals(retLoc2, againLoc2); - assertArrayEquals(retLoc3, againLoc3); - assertArrayEquals(retLoc4, againLoc4); - } - - private InputSplit createMockInputSplit(String[] locations) throws IOException { InputSplit inputSplit = mock(InputSplit.class);