diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/api/impl/YarnClientImpl.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/api/impl/YarnClientImpl.java index ae8716e..b267f4f 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/api/impl/YarnClientImpl.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/api/impl/YarnClientImpl.java @@ -342,7 +342,7 @@ private void addTimelineDelegationToken( } credentials.addToken(timelineService, timelineDelegationToken); if (LOG.isDebugEnabled()) { - LOG.debug("Add timline delegation token into credentials: " + LOG.debug("Add timeline delegation token to credentials: " + timelineDelegationToken); } DataOutputBuffer dob = new DataOutputBuffer(); @@ -358,8 +358,10 @@ private void addTimelineDelegationToken( return timelineClient.getDelegationToken(timelineDTRenewer); } catch (Exception e ) { if (timelineServiceBestEffort) { - LOG.warn("Failed to get delegation token from the timeline server: " + LOG.warn("Failed to get delegation token from the timeline server; " + + " timeline client no longer publishing data: " + e.getMessage()); + LOG.debug("Full exception details", e); return null; } throw e; diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/client/api/impl/TimelineClientImpl.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/client/api/impl/TimelineClientImpl.java index 4a5a443..10a4331 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/client/api/impl/TimelineClientImpl.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/client/api/impl/TimelineClientImpl.java @@ -20,6 +20,7 @@ import java.io.File; import java.io.IOException; +import java.io.InterruptedIOException; import java.lang.reflect.UndeclaredThrowableException; import java.net.ConnectException; import java.net.HttpURLConnection; @@ -102,6 +103,10 @@ private static final String RESOURCE_URI_STR_V2 = "/ws/v2/timeline/"; private static final Joiner JOINER = Joiner.on(""); public final static int DEFAULT_SOCKET_TIMEOUT = 1 * 60 * 1000; // 1 minute + public static final String ERROR_NO_ATS_RESPONSE + = "Failed to get the response from the timeline server"; + public static final String ERROR_RETRIES_EXCEEDED + = "Failed to connect to timeline server"; private static Options opts; private static final String ENTITY_DATA_TYPE = "entity"; @@ -203,11 +208,13 @@ public Object retryOn(TimelineClientRetryOp op) retried = false; // keep trying + Exception lastException; while (true) { try { // try perform the op, if fail, keep retrying return op.run(); } catch (IOException | RuntimeException e) { + lastException = e; // break if there's no retries left if (leftRetries == 0) { break; @@ -227,23 +234,32 @@ public Object retryOn(TimelineClientRetryOp op) Thread.sleep(retryInterval); } catch (InterruptedException ie) { LOG.warn("Client retry sleep interrupted! "); + throw (InterruptedIOException) + (new InterruptedException(ie.toString()).initCause(ie)); } } - throw new RuntimeException("Failed to connect to timeline server. " - + "Connection retries limit exceeded. " - + "The posted timeline event may be missing"); - }; + // reached only if the retry count has been exceeded. + // therefore, lastException no-null + String message = ERROR_RETRIES_EXCEEDED + + "Connection retries limit (" + maxRetries + ") exceeded. " + + "The posted timeline event may be missing : " + lastException; + LOG.warn(message, lastException); + + throw new RuntimeException(message, lastException); + } private void logException(Exception e, int leftRetries) { if (leftRetries > 0) { LOG.info("Exception caught by TimelineClientConnectionRetry," + " will try " + leftRetries + " more time(s).\nMessage: " - + e.getMessage()); + + e); + LOG.debug("Failure", e); } else { // note that maxRetries may be -1 at the very beginning LOG.info("ConnectionException caught by TimelineClientConnectionRetry," + " will keep retrying.\nMessage: " - + e.getMessage()); + + e); + LOG.debug("Failure", e); } } } @@ -270,8 +286,9 @@ public boolean shouldRetryOn(Exception e) { try { return (ClientResponse) connectionRetry.retryOn(jerseyRetryOp); } catch (IOException e) { - throw new ClientHandlerException("Jersey retry failed!\nMessage: " - + e.getMessage()); + URI uri = constructResURI(getConfig(), timelineServiceAddress, true); + throw new ClientHandlerException("Jersey retry failed against " + uri + + "\nException: " + e, e); } } } @@ -899,7 +916,8 @@ public Object run() throws IOException { } catch (UndeclaredThrowableException e) { throw new IOException(e.getCause()); } catch (InterruptedException e) { - throw new IOException(e); + throw (InterruptedIOException) + (new InterruptedIOException(e.toString()).initCause(e)); } } diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/client/api/impl/TimelineWriter.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/client/api/impl/TimelineWriter.java index 8fd0990..741f627 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/client/api/impl/TimelineWriter.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/client/api/impl/TimelineWriter.java @@ -123,22 +123,23 @@ public ClientResponse run() throws Exception { throw new IOException(cause); } } catch (InterruptedException ie) { - throw (IOException)new InterruptedIOException().initCause(ie); + throw (InterruptedIOException) + (new InterruptedIOException(ie.toString()).initCause(ie)); } if (resp == null || resp.getStatusInfo().getStatusCode() != ClientResponse.Status.OK.getStatusCode()) { - String msg = - "Failed to get the response from the timeline server."; - LOG.error(msg); + String msg = TimelineClientImpl.ERROR_NO_ATS_RESPONSE +" at " + resURI; if (resp != null) { - msg += " HTTP error code: " + resp.getStatus(); + int status = resp.getStatus(); + msg += " -status code=" + status; if (LOG.isDebugEnabled()) { String output = resp.getEntity(String.class); - LOG.debug("HTTP error code: " + resp.getStatus() - + " Server response : \n" + output); + LOG.debug("HTTP error code: " + status + + " Server response : \n" + output); } } + LOG.error(msg); throw new YarnException(msg); } return resp; diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/client/api/impl/TestTimelineClient.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/client/api/impl/TestTimelineClient.java index ddf6c32..ad12639 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/client/api/impl/TestTimelineClient.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/client/api/impl/TestTimelineClient.java @@ -37,6 +37,7 @@ import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.security.token.Token; import org.apache.hadoop.security.token.delegation.AbstractDelegationTokenSecretManager; +import org.apache.hadoop.service.ServiceOperations; import org.apache.hadoop.yarn.api.records.timeline.TimelineDomain; import org.apache.hadoop.yarn.api.records.timeline.TimelineEntities; import org.apache.hadoop.yarn.api.records.timeline.TimelineEntity; @@ -78,30 +79,22 @@ public void tearDown() { public void testPostEntities() throws Exception { mockEntityClientResponse(spyTimelineWriter, ClientResponse.Status.OK, false, false); - try { - TimelinePutResponse response = client.putEntities(generateEntity()); - Assert.assertEquals(0, response.getErrors().size()); - } catch (YarnException e) { - Assert.fail("Exception is not expected"); - } + TimelinePutResponse response = client.putEntities(generateEntity()); + Assert.assertEquals(0, response.getErrors().size()); } @Test public void testPostEntitiesWithError() throws Exception { mockEntityClientResponse(spyTimelineWriter, ClientResponse.Status.OK, true, false); - try { - TimelinePutResponse response = client.putEntities(generateEntity()); - Assert.assertEquals(1, response.getErrors().size()); - Assert.assertEquals("test entity id", response.getErrors().get(0) - .getEntityId()); - Assert.assertEquals("test entity type", response.getErrors().get(0) - .getEntityType()); - Assert.assertEquals(TimelinePutResponse.TimelinePutError.IO_EXCEPTION, - response.getErrors().get(0).getErrorCode()); - } catch (YarnException e) { - Assert.fail("Exception is not expected"); - } + TimelinePutResponse response = client.putEntities(generateEntity()); + Assert.assertEquals(1, response.getErrors().size()); + Assert.assertEquals("test entity id", response.getErrors().get(0) + .getEntityId()); + Assert.assertEquals("test entity type", response.getErrors().get(0) + .getEntityType()); + Assert.assertEquals(TimelinePutResponse.TimelinePutError.IO_EXCEPTION, + response.getErrors().get(0).getErrorCode()); } @Test @@ -121,8 +114,7 @@ public void testPostEntitiesNoResponse() throws Exception { client.putEntities(generateEntity()); Assert.fail("Exception is expected"); } catch (YarnException e) { - Assert.assertTrue(e.getMessage().contains( - "Failed to get the response from the timeline server.")); + assertExceptionTextContains(e, TimelineClientImpl.ERROR_NO_ATS_RESPONSE); } } @@ -131,7 +123,7 @@ public void testPostEntitiesConnectionRefused() throws Exception { mockEntityClientResponse(spyTimelineWriter, null, false, true); try { client.putEntities(generateEntity()); - Assert.fail("RuntimeException is expected"); + Assert.fail("Exception is expected"); } catch (RuntimeException re) { Assert.assertTrue(re instanceof ClientHandlerException); } @@ -140,11 +132,7 @@ public void testPostEntitiesConnectionRefused() throws Exception { @Test public void testPutDomain() throws Exception { mockDomainClientResponse(spyTimelineWriter, ClientResponse.Status.OK, false); - try { - client.putDomain(generateDomain()); - } catch (YarnException e) { - Assert.fail("Exception is not expected"); - } + client.putDomain(generateDomain()); } @Test @@ -155,8 +143,13 @@ public void testPutDomainNoResponse() throws Exception { client.putDomain(generateDomain()); Assert.fail("Exception is expected"); } catch (YarnException e) { - Assert.assertTrue(e.getMessage().contains( - "Failed to get the response from the timeline server.")); + assertExceptionTextContains(e, TimelineClientImpl.ERROR_NO_ATS_RESPONSE); + } + } + + private void assertExceptionTextContains(Exception e, String text) { + if (!e.toString().contains(text)) { + throw new AssertionError("Did not find \"" + text + "\" in " + e, e); } } @@ -165,9 +158,9 @@ public void testPutDomainConnectionRefused() throws Exception { mockDomainClientResponse(spyTimelineWriter, null, true); try { client.putDomain(generateDomain()); - Assert.fail("RuntimeException is expected"); + Assert.fail("Exception is expected"); } catch (RuntimeException re) { - Assert.assertTrue(re instanceof ClientHandlerException); + // expected } } @@ -179,44 +172,40 @@ public void testCheckRetryCount() throws Exception { conf.setInt(YarnConfiguration.TIMELINE_SERVICE_CLIENT_MAX_RETRIES, -2); createTimelineClient(conf); - Assert.fail(); + Assert.fail("IllegalArgumentException is expected"); } catch(IllegalArgumentException e) { - Assert.assertTrue(e.getMessage().contains( - YarnConfiguration.TIMELINE_SERVICE_CLIENT_MAX_RETRIES)); + assertExceptionTextContains(e, + YarnConfiguration.TIMELINE_SERVICE_CLIENT_MAX_RETRIES); } try { YarnConfiguration conf = new YarnConfiguration(); conf.setBoolean(YarnConfiguration.TIMELINE_SERVICE_ENABLED, true); conf.setLong(YarnConfiguration.TIMELINE_SERVICE_CLIENT_RETRY_INTERVAL_MS, - 0); + 0); createTimelineClient(conf); - Assert.fail(); + Assert.fail("Exception is expected"); } catch(IllegalArgumentException e) { - Assert.assertTrue(e.getMessage().contains( - YarnConfiguration.TIMELINE_SERVICE_CLIENT_RETRY_INTERVAL_MS)); + assertExceptionTextContains(e, + YarnConfiguration.TIMELINE_SERVICE_CLIENT_RETRY_INTERVAL_MS); } int newMaxRetries = 5; long newIntervalMs = 500; YarnConfiguration conf = new YarnConfiguration(); conf.setInt(YarnConfiguration.TIMELINE_SERVICE_CLIENT_MAX_RETRIES, - newMaxRetries); + newMaxRetries); conf.setLong(YarnConfiguration.TIMELINE_SERVICE_CLIENT_RETRY_INTERVAL_MS, - newIntervalMs); + newIntervalMs); conf.setBoolean(YarnConfiguration.TIMELINE_SERVICE_ENABLED, true); - TimelineClientImpl client = createTimelineClient(conf); + ServiceOperations.stop(client); + client = createTimelineClient(conf); try { // This call should fail because there is no timeline server client.putEntities(generateEntity()); Assert.fail("Exception expected! " + "Timeline server should be off to run this test. "); } catch (RuntimeException ce) { - Assert.assertTrue( - "Handler exception for reason other than retry: " + ce.getMessage(), - ce.getMessage().contains("Connection retries limit exceeded")); - // we would expect this exception here, check if the client has retried - Assert.assertTrue("Retry filter didn't perform any retries! ", client - .connectionRetry.getRetired()); + assertRetryException(client, ce); } } @@ -226,20 +215,21 @@ public void testDelegationTokenOperationsRetry() throws Exception { long newIntervalMs = 500; YarnConfiguration conf = new YarnConfiguration(); conf.setInt(YarnConfiguration.TIMELINE_SERVICE_CLIENT_MAX_RETRIES, - newMaxRetries); + newMaxRetries); conf.setLong(YarnConfiguration.TIMELINE_SERVICE_CLIENT_RETRY_INTERVAL_MS, - newIntervalMs); + newIntervalMs); conf.setBoolean(YarnConfiguration.TIMELINE_SERVICE_ENABLED, true); // use kerberos to bypass the issue in HADOOP-11215 conf.set(CommonConfigurationKeysPublic.HADOOP_SECURITY_AUTHENTICATION, "kerberos"); UserGroupInformation.setConfiguration(conf); - TimelineClientImpl client = createTimelineClient(conf); + ServiceOperations.stop(client); + client = createTimelineClient(conf); TimelineClientImpl clientFake = createTimelineClientFakeTimelineClientRetryOp(conf); - TestTimlineDelegationTokenSecretManager dtManager = - new TestTimlineDelegationTokenSecretManager(); + TestTimelineDelegationTokenSecretManager dtManager = + new TestTimelineDelegationTokenSecretManager(); try { dtManager.startThreads(); Thread.sleep(3000); @@ -250,7 +240,7 @@ public void testDelegationTokenOperationsRetry() throws Exception { UserGroupInformation.getCurrentUser().getShortUserName()); assertFail(); } catch (RuntimeException ce) { - assertException(client, ce); + assertRetryException(client, ce); } try { @@ -265,7 +255,7 @@ public void testDelegationTokenOperationsRetry() throws Exception { new Text("0.0.0.0:8188"))); assertFail(); } catch (RuntimeException ce) { - assertException(client, ce); + assertRetryException(client, ce); } try { @@ -280,7 +270,7 @@ public void testDelegationTokenOperationsRetry() throws Exception { new Text("0.0.0.0:8188"))); assertFail(); } catch (RuntimeException ce) { - assertException(client, ce); + assertRetryException(client, ce); } // Test DelegationTokenOperationsRetry on SocketTimeoutException @@ -295,7 +285,7 @@ public void testDelegationTokenOperationsRetry() throws Exception { new Text("0.0.0.0:8188"))); assertFail(); } catch (RuntimeException ce) { - assertException(clientFake, ce); + assertRetryException(clientFake, ce); } } finally { client.stop(); @@ -309,13 +299,12 @@ private static void assertFail() { + "Timeline server should be off to run this test."); } - private void assertException(TimelineClientImpl client, RuntimeException ce) { - Assert.assertTrue( - "Handler exception for reason other than retry: " + ce.toString(), ce - .getMessage().contains("Connection retries limit exceeded")); + private void assertRetryException(TimelineClientImpl timelineClient, + Exception ce) { + assertExceptionTextContains(ce, TimelineClientImpl.ERROR_RETRIES_EXCEEDED); // we would expect this exception here, check if the client has retried Assert.assertTrue("Retry filter didn't perform any retries! ", - client.connectionRetry.getRetired()); + timelineClient.connectionRetry.getRetired()); } public static ClientResponse mockEntityClientResponse( @@ -434,10 +423,10 @@ private TimelineClientImpl createTimelineClientFakeTimelineClientRetryOp( return client; } - private static class TestTimlineDelegationTokenSecretManager extends + private static class TestTimelineDelegationTokenSecretManager extends AbstractDelegationTokenSecretManager { - public TestTimlineDelegationTokenSecretManager() { + public TestTimelineDelegationTokenSecretManager() { super(100000, 100000, 100000, 100000); }