commit f3350eb655eec53aaa18e9b6c1cc9ff02ab0be02 Author: Vihang Karajgaonkar Date: Tue Mar 27 15:38:01 2018 -0700 HIVE-19050 : DBNotificationListener does not catch exceptions in the cleaner thread diff --git a/hcatalog/server-extensions/src/main/java/org/apache/hive/hcatalog/listener/DbNotificationListener.java b/hcatalog/server-extensions/src/main/java/org/apache/hive/hcatalog/listener/DbNotificationListener.java index d64718159b4112bee2792e053837440334fb297e..fbef6d0d6acdcd85431e3309b5df03e5cf99186f 100644 --- a/hcatalog/server-extensions/src/main/java/org/apache/hive/hcatalog/listener/DbNotificationListener.java +++ b/hcatalog/server-extensions/src/main/java/org/apache/hive/hcatalog/listener/DbNotificationListener.java @@ -93,7 +93,8 @@ private Configuration conf; private MessageFactory msgFactory; - private synchronized void init(Configuration conf) throws MetaException { + //cleaner is a static object, use static synchronized to make sure its thread-safe + private static synchronized void init(Configuration conf) throws MetaException { if (cleaner == null) { cleaner = new CleanerThread(conf, RawStoreProxy.getProxy(conf, conf, @@ -105,7 +106,7 @@ private synchronized void init(Configuration conf) throws MetaException { public DbNotificationListener(Configuration config) throws MetaException { super(config); conf = config; - init(conf); + DbNotificationListener.init(conf); msgFactory = MessageFactory.getInstance(); } @@ -557,7 +558,7 @@ private void process(NotificationEvent event, ListenerEvent listenerEvent) throw static private long sleepTime = 60000; CleanerThread(Configuration conf, RawStore rs) { - super("CleanerThread"); + super("DB-Notification-Cleaner"); this.rs = rs; setTimeToLive(MetastoreConf.getTimeVar(conf, ConfVars.EVENT_DB_LISTENER_TTL, TimeUnit.SECONDS)); @@ -567,7 +568,17 @@ private void process(NotificationEvent event, ListenerEvent listenerEvent) throw @Override public void run() { while (true) { - rs.cleanNotificationEvents(ttl); + try { + rs.cleanNotificationEvents(ttl); + } catch (Exception ex) { + //catching exceptions here makes sure that the thread doesn't die in case of unexpected + //exceptions + LOG.warn( + "Exception received while cleaning notifications. More details can be found in debug mode" + + ex.getMessage()); + LOG.debug(ex.getMessage(), ex); + } + LOG.debug("Cleaner thread done"); try { Thread.sleep(sleepTime); diff --git a/itests/hcatalog-unit/src/test/java/org/apache/hive/hcatalog/listener/DummyRawStoreFailEvent.java b/itests/hcatalog-unit/src/test/java/org/apache/hive/hcatalog/listener/DummyRawStoreFailEvent.java index 5cc407ad68d0fab491cf516067ecdf79acaf4795..60571cca7a0c4b456b0c6f236c432cd76a269433 100644 --- a/itests/hcatalog-unit/src/test/java/org/apache/hive/hcatalog/listener/DummyRawStoreFailEvent.java +++ b/itests/hcatalog-unit/src/test/java/org/apache/hive/hcatalog/listener/DummyRawStoreFailEvent.java @@ -822,6 +822,10 @@ public void addNotificationEvent(NotificationEvent event) { @Override public void cleanNotificationEvents(int olderThan) { + if (!shouldEventSucceed) { + //throw exception to simulate an issue with cleaner thread + throw new RuntimeException("Dummy exception while cleaning notifications"); + } objectStore.cleanNotificationEvents(olderThan); } diff --git a/itests/hcatalog-unit/src/test/java/org/apache/hive/hcatalog/listener/TestDbNotificationListener.java b/itests/hcatalog-unit/src/test/java/org/apache/hive/hcatalog/listener/TestDbNotificationListener.java index e0e29652da94bbdaca515a17955d1409824c1742..5ef047b9c558774bc5ed6164cb19b805000addde 100644 --- a/itests/hcatalog-unit/src/test/java/org/apache/hive/hcatalog/listener/TestDbNotificationListener.java +++ b/itests/hcatalog-unit/src/test/java/org/apache/hive/hcatalog/listener/TestDbNotificationListener.java @@ -116,7 +116,7 @@ private long firstEventId; private static List testsToSkipForReplV1BackwardCompatTesting = - new ArrayList<>(Arrays.asList("cleanupNotifs", "sqlTempTable")); + new ArrayList<>(Arrays.asList("cleanupNotifs", "cleanupNotificationWithError", "sqlTempTable")); // Make sure we skip backward-compat checking for those tests that don't generate events private static ReplicationV1CompatRule bcompat = null; @@ -1355,4 +1355,32 @@ public void cleanupNotifs() throws Exception { LOG.info("second trigger done"); assertEquals(0, rsp2.getEventsSize()); } + + @Test + public void cleanupNotificationWithError() throws Exception { + Database db = new Database("cleanup1", "no description", "file:/tmp", emptyParameters); + msClient.createDatabase(db); + msClient.dropDatabase("cleanup1"); + + LOG.info("Pulling events immediately after createDatabase/dropDatabase"); + NotificationEventResponse rsp = msClient.getNextNotification(firstEventId, 0, null); + assertEquals(2, rsp.getEventsSize()); + //this simulates that cleaning thread will error out while cleaning the notifications + DummyRawStoreFailEvent.setEventSucceed(false); + // sleep for expiry time, and then fetch again + // sleep twice the TTL interval - things should have been cleaned by then. + Thread.sleep(EVENTS_TTL * 2 * 1000); + + LOG.info("Pulling events again after failing to cleanup"); + NotificationEventResponse rsp2 = msClient.getNextNotification(firstEventId, 0, null); + LOG.info("second trigger done"); + assertEquals(2, rsp2.getEventsSize()); + DummyRawStoreFailEvent.setEventSucceed(true); + Thread.sleep(EVENTS_TTL * 2 * 1000); + + LOG.info("Pulling events again after cleanup"); + rsp2 = msClient.getNextNotification(firstEventId, 0, null); + LOG.info("third trigger done"); + assertEquals(0, rsp2.getEventsSize()); + } }