diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java index 1e7fce6..152aff0 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java @@ -1317,6 +1317,23 @@ public static final boolean TIMELINE_SERVICE_HTTP_CROSS_ORIGIN_ENABLED_DEFAULT = false; + /** Timeline client settings */ + public static final String TIMELINE_SERVICE_CLIENT_PREFIX = + TIMELINE_SERVICE_PREFIX + "client."; + + /** Timeline client RESTful call, max retries */ + public static final String TIMELINE_SERVICE_CLIENT_MAX_RETRIES = + TIMELINE_SERVICE_CLIENT_PREFIX + "max-retries"; + + public static final int DEFAULT_TIMELINE_SERVICE_CLIENT_MAX_RETRIES = 30; + + /** Timeline client RESTful call, retry interval */ + public static final String TIMELINE_SERVICE_CLIENT_RETRY_INTERVAL_MS = + TIMELINE_SERVICE_CLIENT_PREFIX + "retry-interval-ms"; + + public static final long + DEFAULT_TIMELINE_SERVICE_CLIENT_RETRY_INTERVAL_MS = 1000; + // /////////////////////////////// // Shared Cache Configs // /////////////////////////////// diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/client/api/TimelineClient.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/client/api/TimelineClient.java index 27987cb..b77dfee 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/client/api/TimelineClient.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/client/api/TimelineClient.java @@ -102,4 +102,21 @@ public abstract void putDomain( public abstract Token getDelegationToken( String renewer) throws IOException, YarnException; + /** + *

+ * Customize the retry behavior of the timeline client. One may set the + * maximum number of retries if the connection to timeline server fails and + * the time interval (in milliseconds) between consecutive retries. If this + * method is not called, the client will use the default maxRetry and time + * interval from the yarn config file. Note that both settings may be + * concurrently accessed when this method is called. + *

+ * + * @param maxRetries + * @param retryIntervalMs + */ + @Public + public abstract void customizeRetrySettings(int maxRetries, + long retryIntervalMs); + } diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/client/api/impl/TimelineClientImpl.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/client/api/impl/TimelineClientImpl.java index fbddd14..872795f 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/client/api/impl/TimelineClientImpl.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/client/api/impl/TimelineClientImpl.java @@ -20,6 +20,7 @@ import java.io.File; import java.io.IOException; +import java.net.ConnectException; import java.net.HttpURLConnection; import java.net.URI; import java.net.URL; @@ -68,7 +69,10 @@ import com.google.common.annotations.VisibleForTesting; import com.google.common.base.Joiner; import com.sun.jersey.api.client.Client; +import com.sun.jersey.api.client.filter.ClientFilter; import com.sun.jersey.api.client.ClientResponse; +import com.sun.jersey.api.client.ClientRequest; +import com.sun.jersey.api.client.ClientHandlerException; import com.sun.jersey.api.client.WebResource; import com.sun.jersey.api.client.config.ClientConfig; import com.sun.jersey.api.client.config.DefaultClientConfig; @@ -102,6 +106,90 @@ private URI resURI; private boolean isEnabled; private KerberosAuthenticatedURLConnectionFactory urlFactory; + private TimelineJerseyRetryFilter retryFilter; + + class TimelineJerseyRetryFilter extends ClientFilter { + // maxRetries < 0 means keep trying + private int maxRetries; + private long retryInterval; + private boolean retried = false; + + // Customize retry settings synchronously + public void changeRetrySettings(int maxRetries, long interval) { + synchronized(this) { + this.maxRetries = maxRetries; + this.retryInterval = interval; + } + } + + // Constructor with default retry settings + public TimelineJerseyRetryFilter(Configuration conf) { + super(); + maxRetries = conf.getInt( + YarnConfiguration.TIMELINE_SERVICE_CLIENT_MAX_RETRIES, + YarnConfiguration.DEFAULT_TIMELINE_SERVICE_CLIENT_MAX_RETRIES); + retryInterval = conf.getLong( + YarnConfiguration.TIMELINE_SERVICE_CLIENT_RETRY_INTERVAL_MS, + YarnConfiguration.DEFAULT_TIMELINE_SERVICE_CLIENT_RETRY_INTERVAL_MS); + } + + @Override + public ClientResponse handle(ClientRequest cr) + throws ClientHandlerException { + // synchronously get a snapshot of current retry settings + int leftRetries = 0; + long sleepMs; + synchronized(this) { + leftRetries = maxRetries; + sleepMs = retryInterval; + } + // retry until we reached the limit + while (leftRetries != 0) { + try { + // try pass the request on, if fail, keep retrying + return getNext().handle(cr); + } catch (ClientHandlerException e) { + if(e.getCause() instanceof ConnectException) { + retried = true; + if (leftRetries > 0) { + LOG.info("Connection Timeout (" + cr.getURI() + "), will try " + + leftRetries + " more time(s)."); + } else { + // note that maxRetries may be -1 at the very beginning + LOG.info("Connection Timeout (" + cr.getURI() + + "), will keep retrying."); + } + } else { + throw e; + } + } + if (leftRetries > 0) { + leftRetries--; + } + try { + // sleep for the given time interval + Thread.sleep(sleepMs); + } catch (InterruptedException ie) { + LOG.warn("Client retry sleep interrupted! "); + } + } + throw new ClientHandlerException("Failed to connect to timeline server. " + + "Connection retries limit exceeded. " + + "The posted timeline event may be missing"); + }; + + public int getMaxRetries() { + return maxRetries; + } + + public long getRetryIntervalMs() { + return retryInterval; + } + + public boolean retried() { + return retried; + } + } public TimelineClientImpl() { super(TimelineClientImpl.class.getName()); @@ -125,6 +213,8 @@ protected void serviceInit(Configuration conf) throws Exception { client = new Client(new URLConnectionClientHandler( new PseudoAuthenticatedURLConnectionFactory(connConfigurator)), cc); } + retryFilter = new TimelineJerseyRetryFilter(conf); + client.addFilter(retryFilter); if (YarnConfiguration.useHttps(conf)) { resURI = URI .create(JOINER.join("https://", conf.get( @@ -206,6 +296,18 @@ private ClientResponse doPosting(Object obj, String path) throws IOException, Ya urlFactory.token, renewer); } + @Override + public void customizeRetrySettings(int maxRetries, + long retryIntervalMs) { + retryFilter.changeRetrySettings(maxRetries, retryIntervalMs); + } + + @Private + @VisibleForTesting + public TimelineJerseyRetryFilter getRetryFilter() { + return retryFilter; + } + @Private @VisibleForTesting public ClientResponse doPostingObject(Object object, String path) { diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/client/api/impl/TestTimelineClient.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/client/api/impl/TestTimelineClient.java index 1301556..5a3bbb3 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/client/api/impl/TestTimelineClient.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/client/api/impl/TestTimelineClient.java @@ -181,6 +181,37 @@ public void testPutDomainConnectionRefused() throws Exception { } } + @Test + public void testChangeRetrySettings() throws Exception { + Assert.assertEquals("Default max retry is not equal to system default", 30, + client.getRetryFilter().getMaxRetries()); + Assert.assertEquals("Default retry interval is not equal to system default", + 1000, client.getRetryFilter().getRetryIntervalMs()); + int newMaxRetries = 60; + long newIntervalMs = 1500; + client.customizeRetrySettings(newMaxRetries, newIntervalMs); + Assert.assertEquals("Max retry is not equal to expect value", newMaxRetries, + client.getRetryFilter().getMaxRetries()); + Assert.assertEquals("Retry interval is not equal to expect value", + newIntervalMs, client.getRetryFilter().getRetryIntervalMs()); + } + + @Test + public void testCheckRetryCount() throws Exception { + int newMaxRetries = 1; + long newIntervalMs = 10; + client.customizeRetrySettings(newMaxRetries, newIntervalMs); + try { + // this call should block a while + TimelinePutResponse response = client.putEntities(generateEntity()); + } catch (ClientHandlerException ce) { + Assert.assertTrue(ce.getCause() instanceof ConnectException); + // we would expect this exception here, check if the client has retried + Assert.assertTrue("Retry filter didn't perform any retries! ", + client.getRetryFilter().retried()); + } + } + private static ClientResponse mockEntityClientResponse( TimelineClientImpl client, ClientResponse.Status status, boolean hasError, boolean hasRuntimeError) {