diff --git a/data/files/identity_udf.jar b/data/files/identity_udf.jar new file mode 100644 index 0000000..8170995 Binary files /dev/null and b/data/files/identity_udf.jar differ diff --git a/itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestJdbcWithMiniHS2.java b/itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestJdbcWithMiniHS2.java index 5087f87..c470c80 100644 --- a/itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestJdbcWithMiniHS2.java +++ b/itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestJdbcWithMiniHS2.java @@ -24,6 +24,7 @@ import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; +import java.lang.reflect.Method; import java.sql.Connection; import java.sql.DriverManager; import java.sql.ResultSet; @@ -39,16 +40,20 @@ import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.conf.HiveConf.ConfVars; import org.apache.hadoop.hive.ql.exec.FunctionRegistry; +import org.apache.hadoop.hive.ql.session.SessionState; +import org.apache.hadoop.util.ReflectionUtils; import org.apache.hive.jdbc.miniHS2.MiniHS2; import org.junit.After; import org.junit.AfterClass; +import org.junit.Assert; import org.junit.Before; import org.junit.BeforeClass; import org.junit.Test; public class TestJdbcWithMiniHS2 { private static MiniHS2 miniHS2 = null; - private static Path dataFilePath; + private static String dataFileDir; + private static Path kvDataFilePath; private static final String tmpDir = System.getProperty("test.tmp.dir"); private Connection hs2Conn = null; @@ -59,9 +64,8 @@ public static void beforeTest() throws Exception { HiveConf conf = new HiveConf(); conf.setBoolVar(ConfVars.HIVE_SUPPORT_CONCURRENCY, false); miniHS2 = new MiniHS2(conf); - String dataFileDir = conf.get("test.data.files").replace('\\', '/') - .replace("c:", ""); - dataFilePath = new Path(dataFileDir, "kv1.txt"); + dataFileDir = conf.get("test.data.files").replace('\\', '/').replace("c:", ""); + kvDataFilePath = new Path(dataFileDir, "kv1.txt"); Map confOverlay = new HashMap(); miniHS2.start(confOverlay); } @@ -101,7 +105,7 @@ public void testConnection() throws Exception { // load data stmt.execute("load data local inpath '" - + dataFilePath.toString() + "' into table " + tableName); + + kvDataFilePath.toString() + "' into table " + tableName); ResultSet res = stmt.executeQuery("SELECT * FROM " + tableName); assertTrue(res.next()); @@ -545,4 +549,61 @@ private void verifyScratchDir(HiveConf conf, FileSystem fs, Path scratchDirPath, fs.getFileStatus(scratchDirPath).getPermission()); } } + + /** + * Tests that Hadoop's ReflectionUtils.CONSTRUCTOR_CACHE clears cached class objects (& hence + * doesn't leak classloaders) on closing any session + * + * @throws Exception + */ + @Test + public void testAddJarConstructorUnCaching() throws Exception { + Path jarFilePath = new Path(dataFileDir, "identity_udf.jar"); + Connection conn = getConnection(miniHS2.getJdbcURL(), "foo", "bar"); + String tableName = "testAddJar"; + Statement stmt = conn.createStatement(); + stmt.execute("SET hive.support.concurrency = false"); + // Create table + stmt.execute("DROP TABLE IF EXISTS " + tableName); + stmt.execute("CREATE TABLE " + tableName + " (key INT, value STRING)"); + // Load data + stmt.execute("LOAD DATA LOCAL INPATH '" + kvDataFilePath.toString() + "' INTO TABLE " + + tableName); + ResultSet res = stmt.executeQuery("SELECT * FROM " + tableName); + // Ensure table is populated + assertTrue(res.next()); + + int cacheBeforeClose; + int cacheAfterClose; + // Add the jar file + stmt.execute("ADD JAR " + jarFilePath.toString()); + // Create a temporary function using the jar + stmt.execute("CREATE TEMPORARY FUNCTION func AS 'IdentityStringUDF'"); + // Execute the UDF + stmt.execute("SELECT func(value) from " + tableName); + cacheBeforeClose = getReflectionUtilsCacheSize(); + System.out.println("Cache before connection close: " + cacheBeforeClose); + // Cache size should be > 0 now + Assert.assertTrue(cacheBeforeClose > 0); + conn.close(); + cacheAfterClose = getReflectionUtilsCacheSize(); + System.out.println("Cache after connection close: " + cacheAfterClose); + // Cache size should be 0 now + Assert.assertTrue("Failed: " + cacheAfterClose, cacheAfterClose == 0); + } + + // Call ReflectionUtils#getCacheSize (which is private) + private int getReflectionUtilsCacheSize() { + Method getCacheSizeMethod; + try { + getCacheSizeMethod = ReflectionUtils.class.getDeclaredMethod("getCacheSize"); + if (getCacheSizeMethod != null) { + getCacheSizeMethod.setAccessible(true); + return (Integer) getCacheSizeMethod.invoke(null); + } + } catch (Exception e) { + System.out.println(e); + } + return -1; + } } \ No newline at end of file diff --git a/ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java b/ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java index c315985..70d35e6 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java @@ -23,6 +23,8 @@ import java.io.IOException; import java.io.InputStream; import java.io.PrintStream; +import java.lang.reflect.InvocationTargetException; +import java.lang.reflect.Method; import java.net.URI; import java.net.URLClassLoader; import java.util.*; @@ -1299,8 +1301,25 @@ public void close() throws IOException { sparkSession = null; } } - dropSessionPaths(conf); + // Hadoop's ReflectionUtils caches constructors for the classes it instantiated. + // In UDFs, this can result in classloaders not getting GCed for a temporary function, + // resulting in a PermGen leak when used extensively from HiveServer2 + clearReflectionUtilsCache(); + } + + private void clearReflectionUtilsCache() { + Method clearCacheMethod; + try { + clearCacheMethod = ReflectionUtils.class.getDeclaredMethod("clearCache"); + if (clearCacheMethod != null) { + clearCacheMethod.setAccessible(true); + clearCacheMethod.invoke(null); + LOG.debug("Cleared Hadoop ReflectionUtils CONSTRUCTOR_CACHE"); + } + } catch (Exception e) { + LOG.info(e); + } } public AuthorizationMode getAuthorizationMode(){