diff --git ql/pom.xml ql/pom.xml
index d73deba440..2d3a0c5468 100644
--- ql/pom.xml
+++ ql/pom.xml
@@ -770,6 +770,12 @@
${powermock.version}
test
+
+ com.google.guava
+ guava-testlib
+ ${guava.version}
+ test
+
diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinOperator.java
index da1dd426c9..b53414ee64 100644
--- ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinOperator.java
+++ ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinOperator.java
@@ -18,16 +18,12 @@
package org.apache.hadoop.hive.ql.exec;
-import java.io.IOException;
-import java.io.Serializable;
-import java.util.ArrayList;
-import java.util.List;
-import java.util.concurrent.Future;
-import java.util.concurrent.locks.ReentrantLock;
-
+import com.esotericsoftware.kryo.KryoException;
+import com.google.common.annotations.VisibleForTesting;
+import com.google.common.base.Preconditions;
+import org.apache.commons.lang.ArrayUtils;
import org.apache.commons.lang3.tuple.ImmutablePair;
import org.apache.commons.lang3.tuple.Pair;
-import org.apache.commons.lang.ArrayUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hive.common.ObjectPair;
import org.apache.hadoop.hive.conf.Constants;
@@ -36,24 +32,25 @@
import org.apache.hadoop.hive.ql.CompilationOpContext;
import org.apache.hadoop.hive.ql.HashTableLoaderFactory;
import org.apache.hadoop.hive.ql.exec.mr.ExecMapperContext;
+import org.apache.hadoop.hive.ql.exec.persistence.AbstractRowContainer;
import org.apache.hadoop.hive.ql.exec.persistence.BytesBytesMultiHashMap;
import org.apache.hadoop.hive.ql.exec.persistence.HybridHashTableContainer;
import org.apache.hadoop.hive.ql.exec.persistence.HybridHashTableContainer.HashPartition;
import org.apache.hadoop.hive.ql.exec.persistence.KeyValueContainer;
import org.apache.hadoop.hive.ql.exec.persistence.MapJoinBytesTableContainer;
import org.apache.hadoop.hive.ql.exec.persistence.MapJoinBytesTableContainer.KeyValueHelper;
-import org.apache.hadoop.hive.ql.exec.persistence.AbstractRowContainer;
import org.apache.hadoop.hive.ql.exec.persistence.MapJoinKey;
import org.apache.hadoop.hive.ql.exec.persistence.MapJoinObjectSerDeContext;
import org.apache.hadoop.hive.ql.exec.persistence.MapJoinRowContainer;
import org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainer;
-import org.apache.hadoop.hive.ql.exec.persistence.ReusableGetAdaptorDirectAccess;
import org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainer.NonMatchedSmallTableIterator;
import org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainer.ReusableGetAdaptor;
import org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainerSerDe;
import org.apache.hadoop.hive.ql.exec.persistence.MatchTracker;
import org.apache.hadoop.hive.ql.exec.persistence.ObjectContainer;
+import org.apache.hadoop.hive.ql.exec.persistence.ReusableGetAdaptorDirectAccess;
import org.apache.hadoop.hive.ql.exec.persistence.UnwrapRowContainer;
+import org.apache.hadoop.hive.ql.exec.spark.SmallTableCache;
import org.apache.hadoop.hive.ql.exec.spark.SparkUtilities;
import org.apache.hadoop.hive.ql.exec.tez.LlapObjectCache;
import org.apache.hadoop.hive.ql.exec.tez.LlapObjectSubCache;
@@ -71,8 +68,8 @@
import org.apache.hadoop.hive.serde2.SerDeUtils;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters;
-import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters.Converter;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils.ObjectInspectorCopyOption;
import org.apache.hadoop.io.BytesWritable;
import org.apache.hadoop.io.Writable;
@@ -80,9 +77,12 @@
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
-import com.google.common.annotations.VisibleForTesting;
-import com.esotericsoftware.kryo.KryoException;
-import com.google.common.base.Preconditions;
+import java.io.IOException;
+import java.io.Serializable;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.concurrent.Future;
+import java.util.concurrent.locks.ReentrantLock;
/**
* Map side Join operator implementation.
@@ -738,6 +738,21 @@ protected void generateFullOuterSmallTableNoMatches(byte smallTablePos,
@Override
public void closeOp(boolean abort) throws HiveException {
+ // Call the small table cache cache method, this way when a task finishes, we still keep the small table around
+ // for at least 30 seconds, which gives any tasks scheduled in the future a chance to re-use the small table.
+ if (HiveConf.getVar(hconf, ConfVars.HIVE_EXECUTION_ENGINE).equals("spark") &&
+ SparkUtilities.isDedicatedCluster(hconf)) {
+
+ for (byte pos = 0; pos < mapJoinTables.length; pos++) {
+ if (pos != conf.getPosBigTable()) {
+ MapJoinTableContainer container = mapJoinTables[pos];
+ if (container != null && container.getFolder() != null) {
+ SmallTableCache.cache(container.getFolder(), container);
+ }
+ }
+ }
+ }
+
if (isFullOuterMapJoin) {
// FULL OUTER MapJoin: After matching the Big Table row keys against the Small Table, we now
diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/AbstractMapJoinTableContainer.java ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/AbstractMapJoinTableContainer.java
index 9e65fd98d6..6e75ff4b41 100644
--- ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/AbstractMapJoinTableContainer.java
+++ ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/AbstractMapJoinTableContainer.java
@@ -18,6 +18,8 @@
package org.apache.hadoop.hive.ql.exec.persistence;
+import org.apache.hadoop.fs.Path;
+
import java.util.Collections;
import java.util.HashMap;
import java.util.Map;
@@ -27,6 +29,7 @@
protected static final String THESHOLD_NAME = "threshold";
protected static final String LOAD_NAME = "load";
+ protected Path folder;
/** Creates metadata for implementation classes' ctors from threshold and load factor. */
protected static Map createConstructorMetaData(int threshold, float loadFactor) {
@@ -48,4 +51,14 @@ protected AbstractMapJoinTableContainer(Map metaData) {
protected void putMetaData(String key, String value) {
metaData.put(key, value);
}
+
+ @Override
+ public void setFolder(Path folder) {
+ this.folder = folder;
+ }
+
+ @Override
+ public Path getFolder() {
+ return folder;
+ }
}
diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/HybridHashTableContainer.java ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/HybridHashTableContainer.java
index 54377428ea..d63a42c3e1 100644
--- ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/HybridHashTableContainer.java
+++ ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/HybridHashTableContainer.java
@@ -121,6 +121,7 @@
private final List