diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/pom.xml b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/pom.xml index 5c4156b..cd85495 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/pom.xml +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/pom.xml @@ -112,6 +112,7 @@ yarn_protos.proto yarn_service_protos.proto applicationmaster_protocol.proto + distributed_scheduler_protocol.proto applicationclient_protocol.proto containermanagement_protocol.proto server/yarn_server_resourcemanager_service_protos.proto diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/DistributedSchedulerProtocol.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/DistributedSchedulerProtocol.java new file mode 100644 index 0000000..f47bf73 --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/DistributedSchedulerProtocol.java @@ -0,0 +1,46 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.api; + +import org.apache.hadoop.classification.InterfaceAudience.Public; +import org.apache.hadoop.classification.InterfaceStability.Unstable; +import org.apache.hadoop.io.retry.Idempotent; +import org.apache.hadoop.yarn.api.protocolrecords.AllocateRequest; +import org.apache.hadoop.yarn.api.protocolrecords.DistSchedAllocateResponse; +import org.apache.hadoop.yarn.api.protocolrecords.DistSchedRegisterResponse; +import org.apache.hadoop.yarn.api.protocolrecords.RegisterApplicationMasterRequest; +import org.apache.hadoop.yarn.exceptions.YarnException; + +import java.io.IOException; + +public interface DistributedSchedulerProtocol extends ApplicationMasterProtocol { + + @Public + @Unstable + @Idempotent + DistSchedRegisterResponse registerApplicationMasterForDistributedScheduling( + RegisterApplicationMasterRequest request) + throws YarnException, IOException; + + @Public + @Unstable + @Idempotent + DistSchedAllocateResponse allocateForDistributedScheduling( + AllocateRequest request) throws YarnException, IOException; +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/DistSchedAllocateResponse.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/DistSchedAllocateResponse.java new file mode 100644 index 0000000..c31c764 --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/DistSchedAllocateResponse.java @@ -0,0 +1,57 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.api.protocolrecords; + +import org.apache.hadoop.classification.InterfaceAudience.Public; +import org.apache.hadoop.classification.InterfaceStability.Unstable; +import org.apache.hadoop.yarn.api.records.NodeId; +import org.apache.hadoop.yarn.util.Records; + +import java.util.List; + +@Public +@Unstable +public abstract class DistSchedAllocateResponse { + + @Public + @Unstable + public static DistSchedAllocateResponse newInstance(AllocateResponse + allResp) { + DistSchedAllocateResponse response = + Records.newRecord(DistSchedAllocateResponse.class); + response.setAllocateResponse(allResp); + return response; + } + + @Public + @Unstable + public abstract void setAllocateResponse(AllocateResponse response); + + @Public + @Unstable + public abstract AllocateResponse getAllocateResponse(); + + @Public + @Unstable + public abstract void setNodesForScheduling(List nodesForScheduling); + + @Public + @Unstable + public abstract List getNodesForScheduling(); +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/DistSchedRegisterResponse.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/DistSchedRegisterResponse.java new file mode 100644 index 0000000..8a79c4c --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/DistSchedRegisterResponse.java @@ -0,0 +1,92 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.api.protocolrecords; + +import org.apache.hadoop.classification.InterfaceAudience.Public; +import org.apache.hadoop.classification.InterfaceStability.Unstable; +import org.apache.hadoop.yarn.api.records.NodeId; +import org.apache.hadoop.yarn.api.records.Resource; +import org.apache.hadoop.yarn.util.Records; + +import java.util.List; + +@Public +@Unstable +public abstract class DistSchedRegisterResponse { + + @Public + @Unstable + public static DistSchedRegisterResponse newInstance + (RegisterApplicationMasterResponse regAMResp) { + DistSchedRegisterResponse response = + Records.newRecord(DistSchedRegisterResponse.class); + response.setRegisterResponse(regAMResp); + return response; + } + + @Public + @Unstable + public abstract void setRegisterResponse( + RegisterApplicationMasterResponse resp); + + @Public + @Unstable + public abstract RegisterApplicationMasterResponse getRegisterResponse(); + + @Public + @Unstable + public abstract void setMinAllocatableCapabilty(Resource minResource); + + @Public + @Unstable + public abstract Resource getMinAllocatableCapabilty(); + + @Public + @Unstable + public abstract void setMaxAllocatableCapabilty(Resource maxResource); + + @Public + @Unstable + public abstract Resource getMaxAllocatableCapabilty(); + + @Public + @Unstable + public abstract void setContainerTokenExpiryInterval(int interval); + + @Public + @Unstable + public abstract int getContainerTokenExpiryInterval(); + + @Public + @Unstable + public abstract void setContainerIdStart(long containerIdStart); + + @Public + @Unstable + public abstract long getContainerIdStart(); + + @Public + @Unstable + public abstract void setNodesForScheduling(List nodesForScheduling); + + @Public + @Unstable + public abstract List getNodesForScheduling(); + +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java index f493fd3..6ed3fab 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java @@ -282,6 +282,38 @@ private static void addDeprecatedKeys() { /** ACL used in case none is found. Allows nothing. */ public static final String DEFAULT_YARN_APP_ACL = " "; + /** Is Distributed Scheduling Enabled */ + public static String DIST_SCHEDULING_ENABLED = + YARN_PREFIX + "distributed-scheduling.enabled"; + public static final boolean DIST_SCHEDULING_ENABLED_DEFAULT = false; + + /** Mininum allocatable container memory for Distributed Scheduling */ + public static String DIST_SCHEDULING_MIN_MEMORY = + YARN_PREFIX + "distributed-scheduling.min-memory"; + public static final int DIST_SCHEDULING_MIN_MEMORY_DEFAULT = 512; + + /** Mininum allocatable container vcores for Distributed Scheduling */ + public static String DIST_SCHEDULING_MIN_VCORES = + YARN_PREFIX + "distributed-scheduling.min-vcores"; + public static final int DIST_SCHEDULING_MIN_VCORES_DEFAULT = 1; + + /** Maximum allocatable container memory for Distributed Scheduling */ + public static String DIST_SCHEDULING_MAX_MEMORY = + YARN_PREFIX + "distributed-scheduling.max-memory"; + public static final int DIST_SCHEDULING_MAX_MEMORY_DEFAULT = 2048; + + /** Maximum allocatable container vcores for Distributed Scheduling */ + public static String DIST_SCHEDULING_MAX_VCORES = + YARN_PREFIX + "distributed-scheduling.max-vcores"; + public static final int DIST_SCHEDULING_MAX_VCORES_DEFAULT = 4; + + /** Container token expiry for container allocated via + * Distributed Scheduling */ + public static String DIST_SCHEDULING_CONTAINER_TOKEN_EXPIRY_MS = + YARN_PREFIX + "distributed-scheduling.container-token-expiry"; + public static final int DIST_SCHEDULING_CONTAINER_TOKEN_EXPIRY_MS_DEFAULT = + 600000; + /** * Enable/disable intermediate-data encryption at YARN level. For now, this * only is used by the FileSystemRMStateStore to setup right file-system diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/distributed_scheduler_protocol.proto b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/distributed_scheduler_protocol.proto new file mode 100644 index 0000000..fb66d21 --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/distributed_scheduler_protocol.proto @@ -0,0 +1,37 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * These .proto interfaces are public and stable. + * Please see http://wiki.apache.org/hadoop/Compatibility + * for what changes are allowed for a *stable* .proto interface. + */ + +option java_package = "org.apache.hadoop.yarn.proto"; +option java_outer_classname = "DistributedSchedulerProtocol"; +option java_generic_services = true; +option java_generate_equals_and_hash = true; +package hadoop.yarn; + +import "yarn_service_protos.proto"; + + +service DistributedSchedulerProtocolService { + rpc registerApplicationMasterForDistributedScheduling (RegisterApplicationMasterRequestProto) returns (DistSchedRegisterResponseProto); + rpc allocateForDistributedScheduling (AllocateRequestProto) returns (DistSchedAllocateResponseProto); +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/yarn_service_protos.proto b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/yarn_service_protos.proto index 8924eba..2f9eec2 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/yarn_service_protos.proto +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/yarn_service_protos.proto @@ -50,6 +50,15 @@ message RegisterApplicationMasterResponseProto { repeated SchedulerResourceTypes scheduler_resource_types = 7; } +message DistSchedRegisterResponseProto { + optional RegisterApplicationMasterResponseProto register_response = 1; + optional ResourceProto max_alloc_capability = 2; + optional ResourceProto min_alloc_capability = 3; + optional int32 container_token_expiry_interval = 4; + optional int64 container_id_start = 5; + repeated NodeIdProto nodes_for_scheduling = 6; +} + message FinishApplicationMasterRequestProto { optional string diagnostics = 1; optional string tracking_url = 2; @@ -91,6 +100,11 @@ message AllocateResponseProto { optional PriorityProto application_priority = 13; } +message DistSchedAllocateResponseProto { + optional AllocateResponseProto allocate_response = 1; + repeated NodeIdProto nodes_for_scheduling = 2; +} + enum SchedulerResourceTypes { MEMORY = 0; CPU = 1; diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/DistributedSchedulerProtocolPB.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/DistributedSchedulerProtocolPB.java new file mode 100644 index 0000000..b750a60 --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/DistributedSchedulerProtocolPB.java @@ -0,0 +1,34 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.api; + +import org.apache.hadoop.classification.InterfaceAudience.Private; +import org.apache.hadoop.classification.InterfaceStability.Unstable; +import org.apache.hadoop.ipc.ProtocolInfo; +import org.apache.hadoop.yarn.proto.ApplicationMasterProtocol.ApplicationMasterProtocolService; +import org.apache.hadoop.yarn.proto.DistributedSchedulerProtocol.DistributedSchedulerProtocolService; + +@Private +@Unstable +@ProtocolInfo(protocolName = "org.apache.hadoop.yarn.api.DistributedSchedulerProtocolPB", + protocolVersion = 1) +public interface DistributedSchedulerProtocolPB extends + DistributedSchedulerProtocolService.BlockingInterface, + ApplicationMasterProtocolService.BlockingInterface { +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/impl/pb/client/DistributedSchedulerProtocolPBClientImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/impl/pb/client/DistributedSchedulerProtocolPBClientImpl.java new file mode 100644 index 0000000..688f99a --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/impl/pb/client/DistributedSchedulerProtocolPBClientImpl.java @@ -0,0 +1,153 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.api.impl.pb.client; + +import com.google.protobuf.ServiceException; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.ipc.ProtobufRpcEngine; +import org.apache.hadoop.ipc.RPC; +import org.apache.hadoop.yarn.api.DistributedSchedulerProtocol; +import org.apache.hadoop.yarn.api.DistributedSchedulerProtocolPB; +import org.apache.hadoop.yarn.api.protocolrecords.AllocateRequest; +import org.apache.hadoop.yarn.api.protocolrecords.AllocateResponse; +import org.apache.hadoop.yarn.api.protocolrecords.DistSchedAllocateResponse; +import org.apache.hadoop.yarn.api.protocolrecords.DistSchedRegisterResponse; +import org.apache.hadoop.yarn.api.protocolrecords.FinishApplicationMasterRequest; +import org.apache.hadoop.yarn.api.protocolrecords.FinishApplicationMasterResponse; +import org.apache.hadoop.yarn.api.protocolrecords.RegisterApplicationMasterRequest; +import org.apache.hadoop.yarn.api.protocolrecords.RegisterApplicationMasterResponse; +import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.AllocateRequestPBImpl; +import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.AllocateResponsePBImpl; + + +import org.apache.hadoop.yarn.api.protocolrecords.impl.pb + .DistSchedAllocateResponsePBImpl; +import org.apache.hadoop.yarn.api.protocolrecords.impl.pb + .DistSchedRegisterResponsePBImpl; +import org.apache.hadoop.yarn.api.protocolrecords.impl.pb + .FinishApplicationMasterRequestPBImpl; +import org.apache.hadoop.yarn.api.protocolrecords.impl.pb + .FinishApplicationMasterResponsePBImpl; +import org.apache.hadoop.yarn.api.protocolrecords.impl.pb + .RegisterApplicationMasterRequestPBImpl; +import org.apache.hadoop.yarn.api.protocolrecords.impl.pb + .RegisterApplicationMasterResponsePBImpl; +import org.apache.hadoop.yarn.exceptions.YarnException; +import org.apache.hadoop.yarn.ipc.RPCUtil; +import org.apache.hadoop.yarn.proto.YarnServiceProtos; + +import java.io.Closeable; +import java.io.IOException; +import java.net.InetSocketAddress; + +public class DistributedSchedulerProtocolPBClientImpl implements + DistributedSchedulerProtocol, Closeable { + + private DistributedSchedulerProtocolPB proxy; + + public DistributedSchedulerProtocolPBClientImpl(long clientVersion, + InetSocketAddress addr, + Configuration conf) throws IOException { + RPC.setProtocolEngine(conf, DistributedSchedulerProtocolPB.class, + ProtobufRpcEngine.class); + proxy = RPC.getProxy(DistributedSchedulerProtocolPB.class, clientVersion, + addr, conf); + } + + @Override + public void close() { + if (this.proxy != null) { + RPC.stopProxy(this.proxy); + } + } + + @Override + public DistSchedRegisterResponse + registerApplicationMasterForDistributedScheduling + (RegisterApplicationMasterRequest request) throws YarnException, + IOException { + YarnServiceProtos.RegisterApplicationMasterRequestProto requestProto = + ((RegisterApplicationMasterRequestPBImpl) request).getProto(); + try { + return new DistSchedRegisterResponsePBImpl( + proxy.registerApplicationMasterForDistributedScheduling( + null, requestProto)); + } catch (ServiceException e) { + RPCUtil.unwrapAndThrowException(e); + return null; + } + } + + @Override + public DistSchedAllocateResponse allocateForDistributedScheduling + (AllocateRequest request) throws YarnException, IOException { + YarnServiceProtos.AllocateRequestProto requestProto = + ((AllocateRequestPBImpl) request).getProto(); + try { + return new DistSchedAllocateResponsePBImpl( + proxy.allocateForDistributedScheduling(null, requestProto)); + } catch (ServiceException e) { + RPCUtil.unwrapAndThrowException(e); + return null; + } + } + + @Override + public RegisterApplicationMasterResponse registerApplicationMaster + (RegisterApplicationMasterRequest request) throws YarnException, + IOException { + YarnServiceProtos.RegisterApplicationMasterRequestProto requestProto = + ((RegisterApplicationMasterRequestPBImpl) request).getProto(); + try { + return new RegisterApplicationMasterResponsePBImpl( + proxy.registerApplicationMaster(null, requestProto)); + } catch (ServiceException e) { + RPCUtil.unwrapAndThrowException(e); + return null; + } + } + + @Override + public FinishApplicationMasterResponse finishApplicationMaster + (FinishApplicationMasterRequest request) throws YarnException, + IOException { + YarnServiceProtos.FinishApplicationMasterRequestProto requestProto = + ((FinishApplicationMasterRequestPBImpl) request).getProto(); + try { + return new FinishApplicationMasterResponsePBImpl( + proxy.finishApplicationMaster(null, requestProto)); + } catch (ServiceException e) { + RPCUtil.unwrapAndThrowException(e); + return null; + } + } + + @Override + public AllocateResponse allocate(AllocateRequest request) throws + YarnException, IOException { + YarnServiceProtos.AllocateRequestProto requestProto = + ((AllocateRequestPBImpl) request).getProto(); + try { + return new AllocateResponsePBImpl(proxy.allocate(null, requestProto)); + } catch (ServiceException e) { + RPCUtil.unwrapAndThrowException(e); + return null; + } + } +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/impl/pb/service/DistributedSchedulerProtocolPBServiceImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/impl/pb/service/DistributedSchedulerProtocolPBServiceImpl.java new file mode 100644 index 0000000..44cc54f --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/impl/pb/service/DistributedSchedulerProtocolPBServiceImpl.java @@ -0,0 +1,144 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.api.impl.pb.service; + +import com.google.protobuf.RpcController; +import com.google.protobuf.ServiceException; +import org.apache.hadoop.yarn.api.DistributedSchedulerProtocol; +import org.apache.hadoop.yarn.api.DistributedSchedulerProtocolPB; +import org.apache.hadoop.yarn.api.protocolrecords.AllocateResponse; +import org.apache.hadoop.yarn.api.protocolrecords.DistSchedAllocateResponse; +import org.apache.hadoop.yarn.api.protocolrecords.DistSchedRegisterResponse; +import org.apache.hadoop.yarn.api.protocolrecords + .FinishApplicationMasterResponse; +import org.apache.hadoop.yarn.api.protocolrecords.RegisterApplicationMasterResponse; +import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.AllocateRequestPBImpl; +import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.AllocateResponsePBImpl; +import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.DistSchedAllocateResponsePBImpl; +import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.DistSchedRegisterResponsePBImpl; +import org.apache.hadoop.yarn.api.protocolrecords.impl.pb + .FinishApplicationMasterRequestPBImpl; +import org.apache.hadoop.yarn.api.protocolrecords.impl.pb + .FinishApplicationMasterResponsePBImpl; +import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.RegisterApplicationMasterRequestPBImpl; +import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.RegisterApplicationMasterResponsePBImpl; +import org.apache.hadoop.yarn.exceptions.YarnException; +import org.apache.hadoop.yarn.proto.YarnServiceProtos; +import org.apache.hadoop.yarn.proto.YarnServiceProtos.AllocateRequestProto; +import org.apache.hadoop.yarn.proto.YarnServiceProtos.DistSchedAllocateResponseProto; +import org.apache.hadoop.yarn.proto.YarnServiceProtos.DistSchedRegisterResponseProto; +import org.apache.hadoop.yarn.proto.YarnServiceProtos.RegisterApplicationMasterRequestProto; + +import java.io.IOException; + +public class DistributedSchedulerProtocolPBServiceImpl implements + DistributedSchedulerProtocolPB { + + private DistributedSchedulerProtocol real; + + public DistributedSchedulerProtocolPBServiceImpl( + DistributedSchedulerProtocol impl) { + this.real = impl; + } + + @Override + public DistSchedRegisterResponseProto + registerApplicationMasterForDistributedScheduling(RpcController controller, + RegisterApplicationMasterRequestProto proto) throws + ServiceException { + RegisterApplicationMasterRequestPBImpl request = new + RegisterApplicationMasterRequestPBImpl(proto); + try { + DistSchedRegisterResponse response = + real.registerApplicationMasterForDistributedScheduling(request); + return ((DistSchedRegisterResponsePBImpl) response).getProto(); + } catch (YarnException e) { + throw new ServiceException(e); + } catch (IOException e) { + throw new ServiceException(e); + } + } + + @Override + public DistSchedAllocateResponseProto + allocateForDistributedScheduling(RpcController controller, + AllocateRequestProto proto) throws ServiceException { + AllocateRequestPBImpl request = new AllocateRequestPBImpl(proto); + try { + DistSchedAllocateResponse response = real + .allocateForDistributedScheduling(request); + return ((DistSchedAllocateResponsePBImpl) response).getProto(); + } catch (YarnException e) { + throw new ServiceException(e); + } catch (IOException e) { + throw new ServiceException(e); + } + } + + @Override + public YarnServiceProtos.AllocateResponseProto allocate(RpcController arg0, + AllocateRequestProto proto) throws ServiceException { + AllocateRequestPBImpl request = new AllocateRequestPBImpl(proto); + try { + AllocateResponse response = real.allocate(request); + return ((AllocateResponsePBImpl) response).getProto(); + } catch (YarnException e) { + throw new ServiceException(e); + } catch (IOException e) { + throw new ServiceException(e); + } + } + + @Override + public YarnServiceProtos.FinishApplicationMasterResponseProto + finishApplicationMaster( + RpcController arg0, YarnServiceProtos + .FinishApplicationMasterRequestProto proto) + throws ServiceException { + FinishApplicationMasterRequestPBImpl request = new + FinishApplicationMasterRequestPBImpl(proto); + try { + FinishApplicationMasterResponse response = real.finishApplicationMaster + (request); + return ((FinishApplicationMasterResponsePBImpl) response).getProto(); + } catch (YarnException e) { + throw new ServiceException(e); + } catch (IOException e) { + throw new ServiceException(e); + } + } + + @Override + public YarnServiceProtos.RegisterApplicationMasterResponseProto + registerApplicationMaster( + RpcController arg0, RegisterApplicationMasterRequestProto proto) + throws ServiceException { + RegisterApplicationMasterRequestPBImpl request = new + RegisterApplicationMasterRequestPBImpl(proto); + try { + RegisterApplicationMasterResponse response = real + .registerApplicationMaster(request); + return ((RegisterApplicationMasterResponsePBImpl) response).getProto(); + } catch (YarnException e) { + throw new ServiceException(e); + } catch (IOException e) { + throw new ServiceException(e); + } + } +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/impl/pb/DistSchedAllocateResponsePBImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/impl/pb/DistSchedAllocateResponsePBImpl.java new file mode 100644 index 0000000..f97bcef --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/impl/pb/DistSchedAllocateResponsePBImpl.java @@ -0,0 +1,175 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.api.protocolrecords.impl.pb; + +import org.apache.hadoop.yarn.api.protocolrecords.AllocateResponse; +import org.apache.hadoop.yarn.api.protocolrecords.DistSchedAllocateResponse; +import org.apache.hadoop.yarn.api.records.NodeId; +import org.apache.hadoop.yarn.api.records.impl.pb.ProtoUtils; +import org.apache.hadoop.yarn.proto.YarnProtos; +import org.apache.hadoop.yarn.proto.YarnServiceProtos; +import org.apache.hadoop.yarn.proto.YarnServiceProtos.DistSchedAllocateResponseProto; + +import java.util.ArrayList; +import java.util.Iterator; +import java.util.List; + +public class DistSchedAllocateResponsePBImpl extends DistSchedAllocateResponse { + + DistSchedAllocateResponseProto proto = + DistSchedAllocateResponseProto.getDefaultInstance(); + DistSchedAllocateResponseProto.Builder builder = null; + boolean viaProto = false; + + private AllocateResponse allocateResponse; + private List nodesForScheduling; + + public DistSchedAllocateResponsePBImpl() { + builder = DistSchedAllocateResponseProto.newBuilder(); + } + + public DistSchedAllocateResponsePBImpl(DistSchedAllocateResponseProto proto) { + this.proto = proto; + viaProto = true; + } + + public DistSchedAllocateResponseProto getProto() { + mergeLocalToProto(); + proto = viaProto ? proto : builder.build(); + viaProto = true; + return proto; + } + + private void maybeInitBuilder() { + if (viaProto || builder == null) { + builder = DistSchedAllocateResponseProto.newBuilder(proto); + } + viaProto = false; + } + + private synchronized void mergeLocalToProto() { + if (viaProto) + maybeInitBuilder(); + mergeLocalToBuilder(); + proto = builder.build(); + viaProto = true; + } + + private synchronized void mergeLocalToBuilder() { + if (this.nodesForScheduling != null) { + builder.clearNodesForScheduling(); + Iterable iterable = + getNodeIdProtoIterable(this.nodesForScheduling); + builder.addAllNodesForScheduling(iterable); + } + if (this.allocateResponse != null) { + builder.setAllocateResponse( + ((AllocateResponsePBImpl)this.allocateResponse).getProto()); + } + } + @Override + public void setAllocateResponse(AllocateResponse response) { + maybeInitBuilder(); + if(allocateResponse == null) { + builder.clearAllocateResponse(); + } + this.allocateResponse = response; + } + + @Override + public AllocateResponse getAllocateResponse() { + if (this.allocateResponse != null) { + return this.allocateResponse; + } + + YarnServiceProtos.DistSchedAllocateResponseProtoOrBuilder p = + viaProto ? proto : builder; + if (!p.hasAllocateResponse()) { + return null; + } + + this.allocateResponse = + new AllocateResponsePBImpl(p.getAllocateResponse()); + return this.allocateResponse; + } + + @Override + public void setNodesForScheduling(List nodesForScheduling) { + maybeInitBuilder(); + if (nodesForScheduling == null || nodesForScheduling.isEmpty()) { + if (this.nodesForScheduling != null) { + this.nodesForScheduling.clear(); + } + builder.clearNodesForScheduling(); + return; + } + this.nodesForScheduling = new ArrayList<>(); + this.nodesForScheduling.addAll(nodesForScheduling); + } + + @Override + public List getNodesForScheduling() { + if (nodesForScheduling != null) { + return nodesForScheduling; + } + initLocalNodesForSchedulingList(); + return nodesForScheduling; + } + + private synchronized void initLocalNodesForSchedulingList() { + YarnServiceProtos.DistSchedAllocateResponseProtoOrBuilder p = + viaProto ? proto : builder; + List list = p.getNodesForSchedulingList(); + nodesForScheduling = new ArrayList<>(); + if (list != null) { + for (YarnProtos.NodeIdProto t : list) { + nodesForScheduling.add(ProtoUtils.convertFromProtoFormat(t)); + } + } + } + private synchronized Iterable getNodeIdProtoIterable( + final List nodeList) { + maybeInitBuilder(); + return new Iterable() { + @Override + public synchronized Iterator iterator() { + return new Iterator() { + + Iterator iter = nodeList.iterator(); + + @Override + public boolean hasNext() { + return iter.hasNext(); + } + + @Override + public YarnProtos.NodeIdProto next() { + return ProtoUtils.convertToProtoFormat(iter.next()); + } + + @Override + public void remove() { + throw new UnsupportedOperationException(); + } + }; + } + }; + } + +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/impl/pb/DistSchedRegisterResponsePBImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/impl/pb/DistSchedRegisterResponsePBImpl.java new file mode 100644 index 0000000..e1a7726 --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/impl/pb/DistSchedRegisterResponsePBImpl.java @@ -0,0 +1,274 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.api.protocolrecords.impl.pb; + +import com.google.protobuf.TextFormat; +import org.apache.hadoop.yarn.api.protocolrecords.DistSchedRegisterResponse; +import org.apache.hadoop.yarn.api.protocolrecords.RegisterApplicationMasterResponse; + +import org.apache.hadoop.yarn.api.records.NodeId; +import org.apache.hadoop.yarn.api.records.Resource; +import org.apache.hadoop.yarn.api.records.impl.pb.ProtoUtils; +import org.apache.hadoop.yarn.proto.YarnProtos; +import org.apache.hadoop.yarn.proto.YarnServiceProtos.DistSchedRegisterResponseProtoOrBuilder; +import org.apache.hadoop.yarn.proto.YarnServiceProtos.DistSchedRegisterResponseProto; + + +import java.util.ArrayList; +import java.util.Iterator; +import java.util.List; + +public class DistSchedRegisterResponsePBImpl extends DistSchedRegisterResponse { + + DistSchedRegisterResponseProto proto = + DistSchedRegisterResponseProto.getDefaultInstance(); + DistSchedRegisterResponseProto.Builder builder = null; + boolean viaProto = false; + + private Resource maxAllocatableCapability; + private Resource minAllocatableCapability; + private List nodesForScheduling; + private RegisterApplicationMasterResponse registerApplicationMasterResponse; + + public DistSchedRegisterResponsePBImpl() { + builder = DistSchedRegisterResponseProto.newBuilder(); + } + + public DistSchedRegisterResponsePBImpl(DistSchedRegisterResponseProto proto) { + this.proto = proto; + viaProto = true; + } + + public DistSchedRegisterResponseProto getProto() { + mergeLocalToProto(); + proto = viaProto ? proto : builder.build(); + viaProto = true; + return proto; + } + + private void maybeInitBuilder() { + if (viaProto || builder == null) { + builder = DistSchedRegisterResponseProto.newBuilder(proto); + } + viaProto = false; + } + + private synchronized void mergeLocalToProto() { + if (viaProto) + maybeInitBuilder(); + mergeLocalToBuilder(); + proto = builder.build(); + viaProto = true; + } + + private synchronized void mergeLocalToBuilder() { + if (this.nodesForScheduling != null) { + builder.clearNodesForScheduling(); + Iterable iterable = + getNodeIdProtoIterable(this.nodesForScheduling); + builder.addAllNodesForScheduling(iterable); + } + if (this.maxAllocatableCapability != null) { + builder.setMaxAllocCapability( + ProtoUtils.convertToProtoFormat(this.maxAllocatableCapability)); + } + if (this.minAllocatableCapability != null) { + builder.setMaxAllocCapability( + ProtoUtils.convertToProtoFormat(this.minAllocatableCapability)); + } + if (this.registerApplicationMasterResponse != null) { + builder.setRegisterResponse( + ((RegisterApplicationMasterResponsePBImpl) + this.registerApplicationMasterResponse).getProto()); + } + } + + @Override + public void setRegisterResponse(RegisterApplicationMasterResponse resp) { + maybeInitBuilder(); + if(registerApplicationMasterResponse == null) { + builder.clearRegisterResponse(); + } + this.registerApplicationMasterResponse = resp; + } + + @Override + public RegisterApplicationMasterResponse getRegisterResponse() { + if (this.registerApplicationMasterResponse != null) { + return this.registerApplicationMasterResponse; + } + + DistSchedRegisterResponseProtoOrBuilder p = viaProto ? proto : builder; + if (!p.hasRegisterResponse()) { + return null; + } + + this.registerApplicationMasterResponse = + new RegisterApplicationMasterResponsePBImpl(p.getRegisterResponse()); + return this.registerApplicationMasterResponse; + } + + @Override + public void setMaxAllocatableCapabilty(Resource maxResource) { + maybeInitBuilder(); + if(maxAllocatableCapability == null) { + builder.clearMaxAllocCapability(); + } + this.maxAllocatableCapability = maxResource; + } + + @Override + public Resource getMaxAllocatableCapabilty() { + if (this.maxAllocatableCapability != null) { + return this.maxAllocatableCapability; + } + + DistSchedRegisterResponseProtoOrBuilder p = viaProto ? proto : builder; + if (!p.hasMaxAllocCapability()) { + return null; + } + + this.maxAllocatableCapability = + ProtoUtils.convertFromProtoFormat(p.getMaxAllocCapability()); + return this.maxAllocatableCapability; + } + + @Override + public void setMinAllocatableCapabilty(Resource minResource) { + maybeInitBuilder(); + if(minAllocatableCapability == null) { + builder.clearMinAllocCapability(); + } + this.minAllocatableCapability = minResource; + } + + @Override + public Resource getMinAllocatableCapabilty() { + if (this.minAllocatableCapability != null) { + return this.minAllocatableCapability; + } + + DistSchedRegisterResponseProtoOrBuilder p = viaProto ? proto : builder; + if (!p.hasMinAllocCapability()) { + return null; + } + + this.minAllocatableCapability = + ProtoUtils.convertFromProtoFormat(p.getMinAllocCapability()); + return this.minAllocatableCapability; + } + + @Override + public void setContainerTokenExpiryInterval(int interval) { + maybeInitBuilder(); + builder.setContainerTokenExpiryInterval(interval); + } + + @Override + public int getContainerTokenExpiryInterval() { + DistSchedRegisterResponseProtoOrBuilder p = viaProto ? proto : builder; + if (!p.hasContainerTokenExpiryInterval()) { + return 0; + } + return p.getContainerTokenExpiryInterval(); + } + + @Override + public void setContainerIdStart(long containerIdStart) { + maybeInitBuilder(); + builder.setContainerIdStart(containerIdStart); + } + + @Override + public long getContainerIdStart() { + DistSchedRegisterResponseProtoOrBuilder p = viaProto ? proto : builder; + if (!p.hasContainerIdStart()) { + return 0; + } + return p.getContainerIdStart(); + } + + + @Override + public void setNodesForScheduling(List nodesForScheduling) { + maybeInitBuilder(); + if (nodesForScheduling == null || nodesForScheduling.isEmpty()) { + if (this.nodesForScheduling != null) { + this.nodesForScheduling.clear(); + } + builder.clearNodesForScheduling(); + return; + } + this.nodesForScheduling = new ArrayList<>(); + this.nodesForScheduling.addAll(nodesForScheduling); + } + + @Override + public List getNodesForScheduling() { + if (nodesForScheduling != null) { + return nodesForScheduling; + } + initLocalNodesForSchedulingList(); + return nodesForScheduling; + } + + private synchronized void initLocalNodesForSchedulingList() { + DistSchedRegisterResponseProtoOrBuilder p = viaProto ? proto : builder; + List list = p.getNodesForSchedulingList(); + nodesForScheduling = new ArrayList<>(); + if (list != null) { + for (YarnProtos.NodeIdProto t : list) { + nodesForScheduling.add(ProtoUtils.convertFromProtoFormat(t)); + } + } + } + private synchronized Iterable getNodeIdProtoIterable( + final List nodeList) { + maybeInitBuilder(); + return new Iterable() { + @Override + public synchronized Iterator iterator() { + return new Iterator() { + + Iterator iter = nodeList.iterator(); + + @Override + public boolean hasNext() { + return iter.hasNext(); + } + + @Override + public YarnProtos.NodeIdProto next() { + return ProtoUtils.convertToProtoFormat(iter.next()); + } + + @Override + public void remove() { + throw new UnsupportedOperationException(); + } + }; + } + }; + } + + @Override + public String toString() { + return TextFormat.shortDebugString(getProto()); + } +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/records/impl/pb/ProtoUtils.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/records/impl/pb/ProtoUtils.java index 2213cef..3f95f10 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/records/impl/pb/ProtoUtils.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/records/impl/pb/ProtoUtils.java @@ -37,8 +37,10 @@ import org.apache.hadoop.yarn.api.records.QueueACL; import org.apache.hadoop.yarn.api.records.QueueState; import org.apache.hadoop.yarn.api.records.ReservationRequestInterpreter; +import org.apache.hadoop.yarn.api.records.Resource; import org.apache.hadoop.yarn.api.records.YarnApplicationAttemptState; import org.apache.hadoop.yarn.api.records.YarnApplicationState; +import org.apache.hadoop.yarn.proto.YarnProtos; import org.apache.hadoop.yarn.proto.YarnProtos.AMCommandProto; import org.apache.hadoop.yarn.proto.YarnProtos.ApplicationAccessTypeProto; import org.apache.hadoop.yarn.proto.YarnProtos.ApplicationResourceUsageReportProto; @@ -294,4 +296,15 @@ public static ExecutionTypeProto convertToProtoFormat(ExecutionType e) { public static ExecutionType convertFromProtoFormat(ExecutionTypeProto e) { return ExecutionType.valueOf(e.name()); } + + /* + * Resource + */ + public static synchronized YarnProtos.ResourceProto convertToProtoFormat(Resource r) { + return ((ResourcePBImpl) r).getProto(); + } + + public static Resource convertFromProtoFormat(YarnProtos.ResourceProto resource) { + return new ResourcePBImpl(resource); + } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/client/ClientRMProxy.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/client/ClientRMProxy.java index b29263e..d41cfa2 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/client/ClientRMProxy.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/client/ClientRMProxy.java @@ -36,6 +36,7 @@ import org.apache.hadoop.security.token.TokenIdentifier; import org.apache.hadoop.yarn.api.ApplicationClientProtocol; import org.apache.hadoop.yarn.api.ApplicationMasterProtocol; +import org.apache.hadoop.yarn.api.DistributedSchedulerProtocol; import org.apache.hadoop.yarn.conf.HAUtil; import org.apache.hadoop.yarn.conf.YarnConfiguration; import org.apache.hadoop.yarn.security.AMRMTokenIdentifier; @@ -95,7 +96,8 @@ protected InetSocketAddress getRMAddress(YarnConfiguration conf, YarnConfiguration.RM_ADMIN_ADDRESS, YarnConfiguration.DEFAULT_RM_ADMIN_ADDRESS, YarnConfiguration.DEFAULT_RM_ADMIN_PORT); - } else if (protocol == ApplicationMasterProtocol.class) { + } else if (protocol == ApplicationMasterProtocol.class + || protocol == DistributedSchedulerProtocol.class ) { setAMRMTokenService(conf); return conf.getSocketAddr(YarnConfiguration.RM_SCHEDULER_ADDRESS, YarnConfiguration.DEFAULT_RM_SCHEDULER_ADDRESS, diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/Context.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/Context.java index 9c2d1fb..baae949 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/Context.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/Context.java @@ -32,6 +32,7 @@ import org.apache.hadoop.yarn.server.nodemanager.containermanager.application.Application; import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container; import org.apache.hadoop.yarn.server.nodemanager.recovery.NMStateStoreService; +import org.apache.hadoop.yarn.server.nodemanager.scheduler.QueueableContainerAllocator; import org.apache.hadoop.yarn.server.nodemanager.security.NMContainerTokenSecretManager; import org.apache.hadoop.yarn.server.nodemanager.security.NMTokenSecretManagerInNM; import org.apache.hadoop.yarn.server.security.ApplicationACLsManager; @@ -87,4 +88,8 @@ ConcurrentLinkedQueue getLogAggregationStatusForApps(); + + boolean isDistributedSchedulingEnabled(); + + QueueableContainerAllocator getContainerAllocator(); } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManager.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManager.java index 04e383f..ff432d6 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManager.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManager.java @@ -69,6 +69,7 @@ import org.apache.hadoop.yarn.server.nodemanager.recovery.NMLeveldbStateStoreService; import org.apache.hadoop.yarn.server.nodemanager.recovery.NMNullStateStoreService; import org.apache.hadoop.yarn.server.nodemanager.recovery.NMStateStoreService; +import org.apache.hadoop.yarn.server.nodemanager.scheduler.QueueableContainerAllocator; import org.apache.hadoop.yarn.server.nodemanager.security.NMContainerTokenSecretManager; import org.apache.hadoop.yarn.server.nodemanager.security.NMTokenSecretManagerInNM; import org.apache.hadoop.yarn.server.nodemanager.webapp.WebServer; @@ -187,9 +188,9 @@ protected DeletionService createDeletionService(ContainerExecutor exec) { protected NMContext createNMContext( NMContainerTokenSecretManager containerTokenSecretManager, NMTokenSecretManagerInNM nmTokenSecretManager, - NMStateStoreService stateStore) { + NMStateStoreService stateStore, boolean isDistSchedulerEnabled) { return new NMContext(containerTokenSecretManager, nmTokenSecretManager, - dirsHandler, aclsManager, stateStore); + dirsHandler, aclsManager, stateStore, isDistSchedulerEnabled); } protected void doSecureLogin() throws IOException { @@ -310,8 +311,12 @@ protected void serviceInit(Configuration conf) throws Exception { getNodeHealthScriptRunner(conf), dirsHandler); addService(nodeHealthChecker); + boolean isDistSchedulingEnabled = + conf.getBoolean(YarnConfiguration.DIST_SCHEDULING_ENABLED, + YarnConfiguration.DIST_SCHEDULING_ENABLED_DEFAULT); + this.context = createNMContext(containerTokenSecretManager, - nmTokenSecretManager, nmStore); + nmTokenSecretManager, nmStore, isDistSchedulingEnabled); nodeLabelsProvider = createNodeLabelsProvider(conf); @@ -340,6 +345,10 @@ protected void serviceInit(Configuration conf) throws Exception { addService(webServer); ((NMContext) context).setWebServer(webServer); + ((NMContext) context).setQueueableContainerAllocator( + new QueueableContainerAllocator(nodeStatusUpdater, context, + webServer.getPort())); + dispatcher.register(ContainerManagerEventType.class, containerManager); dispatcher.register(NodeManagerEventType.class, this); addService(dispatcher); @@ -461,11 +470,14 @@ public void run() { private boolean isDecommissioned = false; private final ConcurrentLinkedQueue logAggregationReportForApps; + private final boolean isDistSchedulingEnabled; + + private QueueableContainerAllocator containerAllocator; public NMContext(NMContainerTokenSecretManager containerTokenSecretManager, NMTokenSecretManagerInNM nmTokenSecretManager, LocalDirsHandlerService dirsHandler, ApplicationACLsManager aclsManager, - NMStateStoreService stateStore) { + NMStateStoreService stateStore, boolean isDistSchedulingEnabled) { this.containerTokenSecretManager = containerTokenSecretManager; this.nmTokenSecretManager = nmTokenSecretManager; this.dirsHandler = dirsHandler; @@ -476,6 +488,7 @@ public NMContext(NMContainerTokenSecretManager containerTokenSecretManager, this.stateStore = stateStore; this.logAggregationReportForApps = new ConcurrentLinkedQueue< LogAggregationReport>(); + this.isDistSchedulingEnabled = isDistSchedulingEnabled; } /** @@ -588,6 +601,21 @@ public void setSystemCrendentialsForApps( getLogAggregationStatusForApps() { return this.logAggregationReportForApps; } + + @Override + public boolean isDistributedSchedulingEnabled() { + return isDistSchedulingEnabled; + } + + public void setQueueableContainerAllocator( + QueueableContainerAllocator containerAllocator) { + this.containerAllocator = containerAllocator; + } + + @Override + public QueueableContainerAllocator getContainerAllocator() { + return containerAllocator; + } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/amrmproxy/AMRMProxyService.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/amrmproxy/AMRMProxyService.java index bd6538c..c564409 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/amrmproxy/AMRMProxyService.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/amrmproxy/AMRMProxyService.java @@ -63,6 +63,8 @@ import org.apache.hadoop.yarn.server.nodemanager.containermanager.application.Application; import org.apache.hadoop.yarn.server.nodemanager.containermanager.application.ApplicationEvent; import org.apache.hadoop.yarn.server.nodemanager.containermanager.application.ApplicationEventType; + +import org.apache.hadoop.yarn.server.nodemanager.scheduler.LocalScheduler; import org.apache.hadoop.yarn.server.security.MasterKeyData; import org.apache.hadoop.yarn.server.utils.BuilderUtils; import org.apache.hadoop.yarn.server.utils.YarnServerSecurityUtils; @@ -464,6 +466,13 @@ protected RequestInterceptor createRequestInterceptorChain() { interceptorClassNames.add(item.trim()); } + // Make sure LocalScheduler is present at the end + // of the chain.. and remove the DefaultRequestInterceptor + if (this.nmContext.isDistributedSchedulingEnabled()) { + interceptorClassNames.remove(DefaultRequestInterceptor.class.getName()); + interceptorClassNames.add(LocalScheduler.class.getName()); + } + return interceptorClassNames; } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/scheduler/LocalScheduler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/scheduler/LocalScheduler.java new file mode 100644 index 0000000..f1f0dd8 --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/scheduler/LocalScheduler.java @@ -0,0 +1,391 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.server.nodemanager.scheduler; + +import com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.security.UserGroupInformation; +import org.apache.hadoop.yarn.api.DistributedSchedulerProtocol; +import org.apache.hadoop.yarn.api.protocolrecords.AllocateRequest; +import org.apache.hadoop.yarn.api.protocolrecords.AllocateResponse; +import org.apache.hadoop.yarn.api.protocolrecords.DistSchedAllocateResponse; +import org.apache.hadoop.yarn.api.protocolrecords.DistSchedRegisterResponse; +import org.apache.hadoop.yarn.api.protocolrecords + .FinishApplicationMasterRequest; +import org.apache.hadoop.yarn.api.protocolrecords + .FinishApplicationMasterResponse; +import org.apache.hadoop.yarn.api.protocolrecords + .RegisterApplicationMasterRequest; +import org.apache.hadoop.yarn.api.protocolrecords + .RegisterApplicationMasterResponse; +import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; +import org.apache.hadoop.yarn.api.records.Container; +import org.apache.hadoop.yarn.api.records.ContainerId; +import org.apache.hadoop.yarn.api.records.ContainerStatus; +import org.apache.hadoop.yarn.api.records.ExecutionType; +import org.apache.hadoop.yarn.api.records.NMToken; +import org.apache.hadoop.yarn.api.records.NodeId; +import org.apache.hadoop.yarn.api.records.Priority; +import org.apache.hadoop.yarn.api.records.Resource; +import org.apache.hadoop.yarn.api.records.ResourceBlacklistRequest; +import org.apache.hadoop.yarn.api.records.ResourceRequest; +import org.apache.hadoop.yarn.client.ClientRMProxy; +import org.apache.hadoop.yarn.exceptions.YarnException; +import org.apache.hadoop.yarn.exceptions.YarnRuntimeException; +import org.apache.hadoop.yarn.server.nodemanager.amrmproxy + .AMRMProxyApplicationContext; +import org.apache.hadoop.yarn.server.nodemanager.amrmproxy.AbstractRequestInterceptor; + + + +import org.apache.hadoop.yarn.server.nodemanager.security + .NMTokenSecretManagerInNM; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.security.PrivilegedExceptionAction; +import java.util.ArrayList; +import java.util.Comparator; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.TreeSet; + +public final class LocalScheduler extends AbstractRequestInterceptor { + + class PartitionedResourceRequests { + private List guaranteed = new ArrayList<>(); + private List queueable = new ArrayList<>(); + public List getGuaranteed() { + return guaranteed; + } + public List getQueueable() { + return queueable; + } + } + + public static class PriorityComparator implements Comparator { + @Override + public int compare(Priority o1, Priority o2) { + return o1.getPriority() - o2.getPriority(); + } + } + + class DistSchedulerParams { + int maxAllocMb; + int minAllocMb; + int maxVCores; + int minVCores; + int containerTokenExpiryInterval; + } + + private static final Logger LOG = LoggerFactory + .getLogger(LocalScheduler.class); + + private int numAllocatedContainers; + private DistSchedulerParams appParams = new DistSchedulerParams(); + private final QueueableContainerAllocator.ContainerIdCounter containerIdCounter = + new QueueableContainerAllocator.ContainerIdCounter(); + private Map nodeList = new HashMap<>(); + private Map nodeTokens = new HashMap<>(); + final Set blacklist = new TreeSet<>(); + final Set priorities = new TreeSet<>(new PriorityComparator()); + final Map>> requests = + new HashMap<>(); + + private DistributedSchedulerProtocol rmClient; + private UserGroupInformation user = null; + + private ApplicationAttemptId applicationAttemptId; + private QueueableContainerAllocator containerAllocator; + private NMTokenSecretManagerInNM nmSecretManager; + private String appSubmitter; + + public void init(AMRMProxyApplicationContext appContext) { + super.init(appContext); + initLocal(appContext.getApplicationAttemptId(), + appContext.getNMCotext().getContainerAllocator(), + appContext.getNMCotext().getNMTokenSecretManager(), + appContext.getUser()); + try { + user = + UserGroupInformation.createProxyUser(applicationAttemptId.toString(), + UserGroupInformation.getCurrentUser()); + user.addToken(appContext.getAMRMToken()); + final Configuration conf = this.getConf(); + + rmClient = + user.doAs(new PrivilegedExceptionAction() { + @Override + public DistributedSchedulerProtocol run() throws Exception { + return ClientRMProxy.createRMProxy(conf, + DistributedSchedulerProtocol.class); + } + }); + } catch (IOException e) { + String message = + "Error while creating of RM app master service proxy for attemptId:" + + applicationAttemptId.toString(); + if (user != null) { + message += ", user: " + user; + } + + LOG.info(message); + throw new YarnRuntimeException(message, e); + } catch (Exception e) { + throw new YarnRuntimeException(e); + } + } + + @VisibleForTesting + void initLocal(ApplicationAttemptId applicationAttemptId, + QueueableContainerAllocator containerAllocator, + NMTokenSecretManagerInNM nmSecretManager, String appSubmitter) { + this.applicationAttemptId = applicationAttemptId; + this.containerAllocator = containerAllocator; + this.nmSecretManager = nmSecretManager; + this.appSubmitter = appSubmitter; + } + + @VisibleForTesting + void setRmClient(DistributedSchedulerProtocol client) { + this.rmClient = client; + } + + @Override + public RegisterApplicationMasterResponse registerApplicationMaster + (RegisterApplicationMasterRequest request) throws YarnException, + IOException { + LOG.info("Forwarding registration request to the" + + "Distributed Scheduler Service on YARN RM"); + DistSchedRegisterResponse dsResp = + rmClient.registerApplicationMasterForDistributedScheduling(request); + updateParameters(dsResp); + return dsResp.getRegisterResponse(); + } + + @Override + public FinishApplicationMasterResponse finishApplicationMaster + (FinishApplicationMasterRequest request) throws YarnException, + IOException { + return getNextInterceptor().finishApplicationMaster(request); + } + + @Override + public AllocateResponse allocate(AllocateRequest request) throws + YarnException, IOException { + PartitionedResourceRequests partitionedAsks = partitionAskList(request + .getAskList()); + request.setAskList(partitionedAsks.getGuaranteed()); + List releasedContainers = request.getReleaseList(); + int numReleasedContainers = releasedContainers.size(); + if (numReleasedContainers > 0) { + LOG.info("AttemptID: " + applicationAttemptId + " released: " + + numReleasedContainers); + numAllocatedContainers -= numReleasedContainers; + } + + // Also, update black list + ResourceBlacklistRequest rbr = request.getResourceBlacklistRequest(); + blacklist.removeAll(rbr.getBlacklistRemovals()); + blacklist.addAll(rbr.getBlacklistAdditions()); + + updateResourceAsk(partitionedAsks.getQueueable()); + List allocatedContainers = new ArrayList<>(); + for (Priority priority : priorities) { + for (Map reqMap : + requests.get(priority).values()) { + Map> allocated = + containerAllocator.allocate(this.appParams, containerIdCounter, + reqMap.values(), blacklist, applicationAttemptId, nodeList, + appSubmitter); + for (Map.Entry> e : allocated.entrySet()) { + updateAllocation(e.getKey(), e.getValue()); + allocatedContainers.addAll(e.getValue()); + } + } + } + + DistSchedAllocateResponse dsResp = + rmClient.allocateForDistributedScheduling(request); + + setNodeList(dsResp.getNodesForScheduling()); + List nmTokens = dsResp.getAllocateResponse().getNMTokens(); + for (NMToken nmToken : nmTokens) { + nodeTokens.put(nmToken.getNodeId(), nmToken); + } + int numCompletedContainers = 0; + List completedContainers = + dsResp.getAllocateResponse().getCompletedContainersStatuses(); + + // Only account for queueable containers + for (ContainerStatus cs : completedContainers) { + if (cs.getExecutionType() == ExecutionType.QUEUEABLE) + numCompletedContainers++; + } + numAllocatedContainers -= numCompletedContainers; + + updateResponseWithNMTokens( + dsResp.getAllocateResponse(), nmTokens, allocatedContainers); + + if (LOG.isDebugEnabled()) { + LOG.debug( + "Number of queueable containers currently allocated by application: " + + numAllocatedContainers); + } + return dsResp.getAllocateResponse(); + } + + private void updateResponseWithNMTokens(AllocateResponse response, + List nmTokens, List allocatedContainers) { + List newTokens = new ArrayList<>(); + if (allocatedContainers.size() > 0) { + response.getAllocatedContainers().addAll(allocatedContainers); + for (Container alloc : allocatedContainers) { + if (!nodeTokens.containsKey(alloc.getNodeId())) { + newTokens.add(nmSecretManager.generateNMToken(appSubmitter, alloc)); + } + } + List retTokens = new ArrayList<>(nmTokens); + retTokens.addAll(newTokens); + response.setNMTokens(retTokens); + } + } + + private PartitionedResourceRequests partitionAskList(List + askList) { + PartitionedResourceRequests partitionedRequests = + new PartitionedResourceRequests(); + for (ResourceRequest rr : askList) { + if (rr.getExecutionType() == ExecutionType.QUEUEABLE) { + partitionedRequests.getQueueable().add(rr); + } else { + partitionedRequests.getGuaranteed().add(rr); + } + } + return partitionedRequests; + } + + private void updateParameters( + DistSchedRegisterResponse registerResponse) { + appParams.minAllocMb = registerResponse.getMinAllocatableCapabilty() + .getMemory(); + appParams.maxAllocMb = registerResponse.getMaxAllocatableCapabilty() + .getMemory(); + appParams.minVCores = registerResponse.getMinAllocatableCapabilty() + .getVirtualCores(); + appParams.maxVCores = registerResponse.getMaxAllocatableCapabilty() + .getVirtualCores(); + appParams.containerTokenExpiryInterval = registerResponse + .getContainerTokenExpiryInterval(); + + containerIdCounter + .resetContainerIdCounter(registerResponse.getContainerIdStart()); + setNodeList(registerResponse.getNodesForScheduling()); + } + + public void updateResourceAsk(List requests) { + // Update resource requests + for (ResourceRequest request : requests) { + // Handling locality for queueable tokens is optional; we rely on + // "anyAsk" to drive allocations + Priority priority = request.getPriority(); + String resourceName = request.getResourceName(); + + if (!ResourceRequest.isAnyLocation(request.getResourceName())) { + continue; + } + + if (request.getNumContainers() == 0) { + continue; + } + + Map> asks = + this.requests.get(priority); + if (asks == null) { + asks = new HashMap<>(); + this.requests.put(priority, asks); + this.priorities.add(priority); + } + + Map reqMap = asks.get(resourceName); + if (reqMap == null) { + reqMap = new HashMap<>(); + asks.put(resourceName, reqMap); + } + + ResourceRequest resourceRequest = reqMap.get(request.getCapability()); + if (resourceRequest == null) { + resourceRequest = request; + reqMap.put(request.getCapability(), request); + } else { + resourceRequest.setNumContainers( + resourceRequest.getNumContainers() + request.getNumContainers()); + } + if (ResourceRequest.isAnyLocation(request.getResourceName())) { + LOG.info("# of requests in ANY (at priority = " + priority + ", " + + "with capability = " + request.getCapability() + " ) : " + + resourceRequest.getNumContainers()); + } + } + } + + public void updateAllocation(Resource capability, + List allocatedContainers) { + numAllocatedContainers += allocatedContainers.size(); + for (Container c : allocatedContainers) { + Map> asks = this + .requests.get(c.getPriority()); + + if (asks == null) + continue; + + // Host specific accounting + removeFromReqMap(capability, asks.get(c.getNodeId().getHost())); + + // any ask accounting + removeFromReqMap(capability, asks.get(ResourceRequest.ANY)); + } + } + + private void removeFromReqMap(Resource capability, Map rrMap) { + if (rrMap != null) { + ResourceRequest rr = rrMap.get(capability); + if (rr != null) { + rr.setNumContainers(rr.getNumContainers() - 1); + if (rr.getNumContainers() == 0) + rrMap.remove(capability); + } + } + } + + private void setNodeList(List nodeList) { + this.nodeList.clear(); + addToNodeList(nodeList); + } + + private void addToNodeList(List nodes) { + for (NodeId n : nodes) { + this.nodeList.put(n.getHost(), n); + } + } +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/scheduler/QueueableContainerAllocator.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/scheduler/QueueableContainerAllocator.java new file mode 100644 index 0000000..d2ad3b3 --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/scheduler/QueueableContainerAllocator.java @@ -0,0 +1,183 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.server.nodemanager.scheduler; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.net.NetUtils; +import org.apache.hadoop.security.SecurityUtil; +import org.apache.hadoop.yarn.api.records.*; +import org.apache.hadoop.yarn.exceptions.YarnException; +import org.apache.hadoop.yarn.nodelabels.CommonNodeLabelsManager; +import org.apache.hadoop.yarn.security.ContainerTokenIdentifier; +import org.apache.hadoop.yarn.server.api.ContainerType; +import org.apache.hadoop.yarn.server.nodemanager.Context; +import org.apache.hadoop.yarn.server.nodemanager.NodeStatusUpdater; +import org.apache.hadoop.yarn.server.nodemanager.scheduler.LocalScheduler.DistSchedulerParams; +import org.apache.hadoop.yarn.server.utils.BuilderUtils; + +import java.net.InetSocketAddress; +import java.util.*; +import java.util.concurrent.atomic.AtomicLong; + +public class QueueableContainerAllocator { + + private static final Log LOG = + LogFactory.getLog(QueueableContainerAllocator.class); + + static class ContainerIdCounter { + final AtomicLong containerIdCounter = new AtomicLong(1); + + void resetContainerIdCounter(long containerIdStart) { + this.containerIdCounter.set(containerIdStart % 2 == 0 ? + containerIdStart + 1 : containerIdStart); + } + + long generateContainerId() { + return this.containerIdCounter.getAndAdd(2); + } + } + + private final NodeStatusUpdater nodeStatusUpdater; + private final Context context; + private int webpagePort; + + public QueueableContainerAllocator(NodeStatusUpdater nodeStatusUpdater, + Context context, int webpagePort) { + this.nodeStatusUpdater = nodeStatusUpdater; + this.context = context; + this.webpagePort = webpagePort; + } + + public Map> allocate(DistSchedulerParams appParams, + ContainerIdCounter idCounter, Collection resourceAsks, + Set blacklist, ApplicationAttemptId appAttId, + Map allNodes, String userName) throws YarnException { + Map> containers = new HashMap<>(); + Set nodesAllocated = new HashSet<>(); + List anyAsks = new ArrayList<>(resourceAsks); + for (ResourceRequest anyAsk : anyAsks) { + allocateQueueableContainers(appParams, idCounter, blacklist, appAttId, + allNodes, userName, containers, nodesAllocated, anyAsk); + } + if (resourceAsks.size() > 0) { + LOG.info("Queueable allocation requested for: " + resourceAsks.size() + + " containers; allocated = " + containers.size()); + } + return containers; + } + + private void allocateQueueableContainers(DistSchedulerParams appParams, + ContainerIdCounter idCounter, Set blacklist, + ApplicationAttemptId id, Map allNodes, String userName, + Map> containers, Set nodesAllocated, + ResourceRequest anyAsk) throws YarnException { + int toAllocate = anyAsk.getNumContainers() + - containers.get(anyAsk.getCapability()).size(); + + List topKNodesLeft = new ArrayList<>(); + for (String s : allNodes.keySet()) { + // Bias away from whatever we have already allocated and respect blacklist + if (nodesAllocated.contains(s) || blacklist.contains(s)) + continue; + topKNodesLeft.add(s); + } + int numAllocated = 0; + int nextNodeToAllocate = 0; + for (int numCont = 0; numCont < toAllocate; numCont++) { + String topNode = topKNodesLeft.get(nextNodeToAllocate); + nextNodeToAllocate++; + nextNodeToAllocate %= topKNodesLeft.size(); + NodeId nodeId = allNodes.get(topNode); + Container container = buildContainer(appParams, idCounter, anyAsk, id, + userName, nodeId); + List cList = containers.get(anyAsk.getCapability()); + if (cList == null) { + cList = new ArrayList<>(); + containers.put(anyAsk.getCapability(), cList); + } + cList.add(container); + numAllocated++; + LOG.info("Allocated " + numAllocated + " queueable containers."); + } + } + + private Container buildContainer(DistSchedulerParams appParams, + ContainerIdCounter idCounter, ResourceRequest rr, ApplicationAttemptId id, + String userName, NodeId nodeId) throws YarnException { + ContainerId cId = + ContainerId.newContainerId(id, idCounter.generateContainerId()); + Resource capability = fixCapability(appParams, rr.getCapability()); + long currTime = System.currentTimeMillis(); + ContainerTokenIdentifier containerTokenIdentifier = + new ContainerTokenIdentifier( + cId, nodeId.getHost(), userName, capability, + currTime + appParams.containerTokenExpiryInterval, + context.getContainerTokenSecretManager().getCurrentKey().getKeyId(), + nodeStatusUpdater.getRMIdentifier(), rr.getPriority(), currTime, + null, CommonNodeLabelsManager.NO_LABEL, ContainerType.TASK, + ExecutionType.QUEUEABLE); + byte[] pwd = + context.getContainerTokenSecretManager().createPassword( + containerTokenIdentifier); + Token containerToken = newContainerToken(nodeId, pwd, + containerTokenIdentifier); + Container container = BuilderUtils.newContainer( + cId, nodeId, nodeId.getHost() + ":" + webpagePort, + capability, rr.getPriority(), containerToken); + return container; + } + + private Resource fixCapability(DistSchedulerParams appParams, + Resource capability) { + return Resource.newInstance( + fixMemory(appParams, capability.getMemory()), + fixVCores(appParams, capability.getVirtualCores())); + } + + // #!# Round to the next multiple of container dimension + private int fixMemory(DistSchedulerParams appParams, int memory) { + if (memory > appParams.maxAllocMb) { + return appParams.maxAllocMb; + } + return appParams.minAllocMb * + ((int) Math.ceil((float) memory / appParams.minAllocMb)); + } + + // #!# Round to the select the right number of virtual core + private int fixVCores(DistSchedulerParams appParams, int vCores) { + if (vCores > appParams.maxVCores) { + return appParams.maxVCores; + } + return vCores < appParams.minVCores ? appParams.minVCores : vCores; + } + + public static Token newContainerToken(NodeId nodeId, byte[] password, + ContainerTokenIdentifier tokenIdentifier) { + // RPC layer client expects ip:port as service for tokens + InetSocketAddress addr = NetUtils.createSocketAddrForHost(nodeId.getHost(), + nodeId.getPort()); + // NOTE: use SecurityUtil.setTokenService if this becomes a "real" token + Token containerToken = Token.newInstance(tokenIdentifier.getBytes(), + ContainerTokenIdentifier.KIND.toString(), password, SecurityUtil + .buildTokenService(addr).toString()); + return containerToken; + } + +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/security/NMTokenSecretManagerInNM.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/security/NMTokenSecretManagerInNM.java index f6169e7..86cce35 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/security/NMTokenSecretManagerInNM.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/security/NMTokenSecretManagerInNM.java @@ -29,7 +29,10 @@ import org.apache.hadoop.classification.InterfaceAudience.Private; import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; import org.apache.hadoop.yarn.api.records.ApplicationId; +import org.apache.hadoop.yarn.api.records.Container; +import org.apache.hadoop.yarn.api.records.NMToken; import org.apache.hadoop.yarn.api.records.NodeId; +import org.apache.hadoop.yarn.api.records.Token; import org.apache.hadoop.yarn.security.NMTokenIdentifier; import org.apache.hadoop.yarn.server.api.records.MasterKey; import org.apache.hadoop.yarn.server.nodemanager.recovery.NMNullStateStoreService; @@ -50,7 +53,7 @@ private final Map oldMasterKeys; private final Map> appToAppAttemptMap; private final NMStateStoreService stateStore; - private NodeId nodeId; + private NodeId nodeId; public NMTokenSecretManagerInNM() { this(new NMNullStateStoreService()); @@ -276,4 +279,23 @@ private void removeAppAttemptKey(ApplicationAttemptId attempt) { LOG.error("Unable to remove master key for application " + attempt, e); } } + + /** + * Used by the Distributed Scheduler framework to generate NMTokens + * @param applicationSubmitter + * @param container + * @return NMToken + */ + public NMToken generateNMToken( + String applicationSubmitter, Container container) { + this.readLock.lock(); + try { + Token token = + createNMToken(container.getId().getApplicationAttemptId(), + container.getNodeId(), applicationSubmitter); + return NMToken.newInstance(container.getNodeId(), token); + } finally { + this.readLock.unlock(); + } + } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestEventFlow.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestEventFlow.java index 3dc62bc..6885804 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestEventFlow.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestEventFlow.java @@ -80,7 +80,7 @@ public void testSuccessfulContainerLaunch() throws InterruptedException, Context context = new NMContext(new NMContainerTokenSecretManager(conf), new NMTokenSecretManagerInNM(), null, null, - new NMNullStateStoreService()) { + new NMNullStateStoreService(), false) { @Override public int getHttpPort() { return 1234; diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeStatusUpdater.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeStatusUpdater.java index 90804b8..d0d437c 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeStatusUpdater.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeStatusUpdater.java @@ -1565,7 +1565,7 @@ protected NodeStatusUpdater createNodeStatusUpdater(Context context, protected NMContext createNMContext( NMContainerTokenSecretManager containerTokenSecretManager, NMTokenSecretManagerInNM nmTokenSecretManager, - NMStateStoreService store) { + NMStateStoreService store, boolean isDistributedSchedulingEnabled) { return new MyNMContext(containerTokenSecretManager, nmTokenSecretManager); } @@ -1800,7 +1800,7 @@ public MyNMContext( NMContainerTokenSecretManager containerTokenSecretManager, NMTokenSecretManagerInNM nmTokenSecretManager) { super(containerTokenSecretManager, nmTokenSecretManager, null, null, - new NMNullStateStoreService()); + new NMNullStateStoreService(), false); } @Override diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/amrmproxy/BaseAMRMProxyTest.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/amrmproxy/BaseAMRMProxyTest.java index 9bc23f6..823860b 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/amrmproxy/BaseAMRMProxyTest.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/amrmproxy/BaseAMRMProxyTest.java @@ -66,6 +66,8 @@ import org.apache.hadoop.yarn.server.nodemanager.containermanager.application.Application; import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container; import org.apache.hadoop.yarn.server.nodemanager.recovery.NMStateStoreService; +import org.apache.hadoop.yarn.server.nodemanager.scheduler + .QueueableContainerAllocator; import org.apache.hadoop.yarn.server.nodemanager.security.NMContainerTokenSecretManager; import org.apache.hadoop.yarn.server.nodemanager.security.NMTokenSecretManagerInNM; import org.apache.hadoop.yarn.server.security.ApplicationACLsManager; @@ -678,5 +680,14 @@ public NodeResourceMonitor getNodeResourceMonitor() { return null; } + @Override + public boolean isDistributedSchedulingEnabled() { + return false; + } + + @Override + public QueueableContainerAllocator getContainerAllocator() { + return null; + } } } \ No newline at end of file diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/BaseContainerManagerTest.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/BaseContainerManagerTest.java index c902fd5..d070bbe 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/BaseContainerManagerTest.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/BaseContainerManagerTest.java @@ -110,7 +110,7 @@ public BaseContainerManagerTest() throws UnsupportedFileSystemException { protected Configuration conf = new YarnConfiguration(); protected Context context = new NMContext(new NMContainerTokenSecretManager( conf), new NMTokenSecretManagerInNM(), null, - new ApplicationACLsManager(conf), new NMNullStateStoreService()) { + new ApplicationACLsManager(conf), new NMNullStateStoreService(), false) { public int getHttpPort() { return HTTP_PORT; }; diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/TestContainerManagerRecovery.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/TestContainerManagerRecovery.java index 2e014de..dfb7a1b 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/TestContainerManagerRecovery.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/TestContainerManagerRecovery.java @@ -471,7 +471,7 @@ private NMContext createContext(Configuration conf, NMStateStoreService stateStore) { NMContext context = new NMContext(new NMContainerTokenSecretManager( conf), new NMTokenSecretManagerInNM(), null, - new ApplicationACLsManager(conf), stateStore){ + new ApplicationACLsManager(conf), stateStore, false){ public int getHttpPort() { return HTTP_PORT; } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/TestContainerLaunch.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/TestContainerLaunch.java index 0abae2b..6f3adb8 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/TestContainerLaunch.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/TestContainerLaunch.java @@ -109,7 +109,7 @@ protected Context distContext = new NMContext(new NMContainerTokenSecretManager( conf), new NMTokenSecretManagerInNM(), null, - new ApplicationACLsManager(conf), new NMNullStateStoreService()) { + new ApplicationACLsManager(conf), new NMNullStateStoreService(), false) { public int getHttpPort() { return HTTP_PORT; }; diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/localizer/TestLocalCacheDirectoryManager.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/localizer/TestLocalCacheDirectoryManager.java index 9e08b7f..c768df1 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/localizer/TestLocalCacheDirectoryManager.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/localizer/TestLocalCacheDirectoryManager.java @@ -81,7 +81,8 @@ public void testMinimumPerDirectoryFileLimit() { NMContext nmContext = new NMContext(new NMContainerTokenSecretManager(conf), new NMTokenSecretManagerInNM(), null, - new ApplicationACLsManager(conf), new NMNullStateStoreService()); + new ApplicationACLsManager(conf), new NMNullStateStoreService(), + false); ResourceLocalizationService service = new ResourceLocalizationService(null, null, null, null, nmContext); try { diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/localizer/TestResourceLocalizationService.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/localizer/TestResourceLocalizationService.java index 64d3d68..4c594e1 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/localizer/TestResourceLocalizationService.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/localizer/TestResourceLocalizationService.java @@ -186,7 +186,7 @@ public void setup() throws IOException { conf.set(YarnConfiguration.NM_LOG_DIRS, logDir); nmContext = new NMContext(new NMContainerTokenSecretManager( conf), new NMTokenSecretManagerInNM(), null, - new ApplicationACLsManager(conf), new NMNullStateStoreService()); + new ApplicationACLsManager(conf), new NMNullStateStoreService(), false); } @After @@ -2365,7 +2365,7 @@ private ResourceLocalizationService createSpyService( NMContext nmContext = new NMContext(new NMContainerTokenSecretManager(conf), new NMTokenSecretManagerInNM(), null, - new ApplicationACLsManager(conf), stateStore); + new ApplicationACLsManager(conf), stateStore, false); ResourceLocalizationService rawService = new ResourceLocalizationService(dispatcher, exec, delService, dirsHandler, nmContext); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/scheduler/TestLocalScheduler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/scheduler/TestLocalScheduler.java new file mode 100644 index 0000000..2e3ac9a --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/scheduler/TestLocalScheduler.java @@ -0,0 +1,22 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.server.nodemanager.scheduler; + +public class TestLocalScheduler { +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/TestContainerLogsPage.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/TestContainerLogsPage.java index 84e42fc..6a72cc0 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/TestContainerLogsPage.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/TestContainerLogsPage.java @@ -96,7 +96,7 @@ public void testContainerLogDirs() throws IOException, YarnException { healthChecker.init(conf); LocalDirsHandlerService dirsHandler = healthChecker.getDiskHandler(); NMContext nmContext = new NodeManager.NMContext(null, null, dirsHandler, - new ApplicationACLsManager(conf), new NMNullStateStoreService()); + new ApplicationACLsManager(conf), new NMNullStateStoreService(), false); // Add an application and the corresponding containers RecordFactory recordFactory = RecordFactoryProvider.getRecordFactory(conf); String user = "nobody"; @@ -136,7 +136,7 @@ public void testContainerLogDirs() throws IOException, YarnException { when(dirsHandlerForFullDisk.getLogDirsForRead()). thenReturn(Arrays.asList(new String[] {absLogDir.getAbsolutePath()})); nmContext = new NodeManager.NMContext(null, null, dirsHandlerForFullDisk, - new ApplicationACLsManager(conf), new NMNullStateStoreService()); + new ApplicationACLsManager(conf), new NMNullStateStoreService(), false); nmContext.getApplications().put(appId, app); container.setState(ContainerState.RUNNING); nmContext.getContainers().put(container1, container); @@ -158,7 +158,7 @@ public void testContainerLogFile() throws IOException, YarnException { LocalDirsHandlerService dirsHandler = new LocalDirsHandlerService(); dirsHandler.init(conf); NMContext nmContext = new NodeManager.NMContext(null, null, dirsHandler, - new ApplicationACLsManager(conf), new NMNullStateStoreService()); + new ApplicationACLsManager(conf), new NMNullStateStoreService(), false); // Add an application and the corresponding containers String user = "nobody"; long clusterTimeStamp = 1234; diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/TestNMWebServer.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/TestNMWebServer.java index e1845c7..39e8394 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/TestNMWebServer.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/TestNMWebServer.java @@ -87,7 +87,7 @@ private NodeHealthCheckerService createNodeHealthCheckerService(Configuration co private int startNMWebAppServer(String webAddr) { Context nmContext = new NodeManager.NMContext(null, null, null, null, - null); + null, false); ResourceView resourceView = new ResourceView() { @Override public long getVmemAllocatedForContainers() { @@ -150,7 +150,7 @@ public void testNMWebAppWithEphemeralPort() throws IOException { @Test public void testNMWebApp() throws IOException, YarnException { Context nmContext = new NodeManager.NMContext(null, null, null, null, - null); + null, false); ResourceView resourceView = new ResourceView() { @Override public long getVmemAllocatedForContainers() { diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/TestNMWebServices.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/TestNMWebServices.java index 1f5590c..2ac0956 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/TestNMWebServices.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/TestNMWebServices.java @@ -111,7 +111,7 @@ protected void configureServlets() { healthChecker.init(conf); aclsManager = new ApplicationACLsManager(conf); nmContext = new NodeManager.NMContext(null, null, dirsHandler, - aclsManager, null); + aclsManager, null, false); NodeId nodeId = NodeId.newInstance("testhost.foo.com", 8042); ((NodeManager.NMContext)nmContext).setNodeId(nodeId); resourceView = new ResourceView() { diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/TestNMWebServicesApps.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/TestNMWebServicesApps.java index e274abb..dfbcf06 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/TestNMWebServicesApps.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/TestNMWebServicesApps.java @@ -104,7 +104,7 @@ protected void configureServlets() { dirsHandler = healthChecker.getDiskHandler(); aclsManager = new ApplicationACLsManager(conf); nmContext = new NodeManager.NMContext(null, null, dirsHandler, - aclsManager, null); + aclsManager, null, false); NodeId nodeId = NodeId.newInstance("testhost.foo.com", 9999); ((NodeManager.NMContext)nmContext).setNodeId(nodeId); resourceView = new ResourceView() { diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/TestNMWebServicesContainers.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/TestNMWebServicesContainers.java index 0ed56d3..efad825 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/TestNMWebServicesContainers.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/TestNMWebServicesContainers.java @@ -132,7 +132,7 @@ public boolean isPmemCheckEnabled() { dirsHandler = healthChecker.getDiskHandler(); aclsManager = new ApplicationACLsManager(conf); nmContext = new NodeManager.NMContext(null, null, dirsHandler, - aclsManager, null) { + aclsManager, null, false) { public NodeId getNodeId() { return NodeId.newInstance("testhost.foo.com", 8042); }; diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ApplicationMasterService.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ApplicationMasterService.java index ab94175..6c015a0 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ApplicationMasterService.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ApplicationMasterService.java @@ -35,6 +35,7 @@ import org.apache.hadoop.classification.InterfaceAudience.Private; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.CommonConfigurationKeysPublic; +import org.apache.hadoop.ipc.RPC; import org.apache.hadoop.ipc.Server; import org.apache.hadoop.security.SaslRpcServer; import org.apache.hadoop.security.UserGroupInformation; @@ -48,6 +49,9 @@ import org.apache.hadoop.yarn.api.protocolrecords.FinishApplicationMasterResponse; import org.apache.hadoop.yarn.api.protocolrecords.RegisterApplicationMasterRequest; import org.apache.hadoop.yarn.api.protocolrecords.RegisterApplicationMasterResponse; + +import org.apache.hadoop.yarn.api.protocolrecords.impl.pb + .DistSchedRegisterResponsePBImpl; import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; import org.apache.hadoop.yarn.api.records.ApplicationId; import org.apache.hadoop.yarn.api.records.ApplicationSubmissionContext; @@ -86,9 +90,15 @@ import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.event.RMAppAttemptStatusupdateEvent; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.event.RMAppAttemptUnregistrationEvent; import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler + .AbstractYarnScheduler; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.Allocation; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler + .SchedulerApplicationAttempt; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerNodeReport; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.YarnScheduler; +import org.apache.hadoop.yarn.server.resourcemanager.security + .AMRMTokenSecretManager; import org.apache.hadoop.yarn.server.resourcemanager.security.authorize.RMPolicyProvider; import org.apache.hadoop.yarn.server.security.MasterKeyData; import org.apache.hadoop.yarn.server.utils.BuilderUtils; @@ -104,21 +114,27 @@ private static final Log LOG = LogFactory.getLog(ApplicationMasterService.class); private final AMLivelinessMonitor amLivelinessMonitor; private YarnScheduler rScheduler; - private InetSocketAddress masterServiceAddress; - private Server server; - private final RecordFactory recordFactory = + protected InetSocketAddress masterServiceAddress; + protected Server server; + protected final RecordFactory recordFactory = RecordFactoryProvider.getRecordFactory(null); private final ConcurrentMap responseMap = new ConcurrentHashMap(); - private final RMContext rmContext; + protected final RMContext rmContext; - public ApplicationMasterService(RMContext rmContext, YarnScheduler scheduler) { + public ApplicationMasterService(String name, RMContext rmContext, + YarnScheduler scheduler) { super(ApplicationMasterService.class.getName()); this.amLivelinessMonitor = rmContext.getAMLivelinessMonitor(); this.rScheduler = scheduler; this.rmContext = rmContext; } + public ApplicationMasterService(RMContext rmContext, + YarnScheduler scheduler) { + this(ApplicationMasterService.class.getName(), rmContext, scheduler); + } + @Override protected void serviceInit(Configuration conf) throws Exception { masterServiceAddress = conf.getSocketAddr( @@ -139,11 +155,8 @@ protected void serviceStart() throws Exception { serverConf.set( CommonConfigurationKeysPublic.HADOOP_SECURITY_AUTHENTICATION, SaslRpcServer.AuthMethod.TOKEN.toString()); - this.server = - rpc.getServer(ApplicationMasterProtocol.class, this, masterServiceAddress, - serverConf, this.rmContext.getAMRMTokenSecretManager(), - serverConf.getInt(YarnConfiguration.RM_SCHEDULER_CLIENT_THREAD_COUNT, - YarnConfiguration.DEFAULT_RM_SCHEDULER_CLIENT_THREAD_COUNT)); + this.server = getServer(rpc, serverConf, masterServiceAddress, + this.rmContext.getAMRMTokenSecretManager()); // Enable service authorization? if (conf.getBoolean( @@ -158,7 +171,7 @@ protected void serviceStart() throws Exception { } refreshServiceAcls(conf, RMPolicyProvider.getInstance()); } - + this.server.start(); this.masterServiceAddress = conf.updateConnectAddr(YarnConfiguration.RM_BIND_HOST, @@ -168,6 +181,14 @@ protected void serviceStart() throws Exception { super.serviceStart(); } + protected Server getServer(YarnRPC rpc, Configuration serverConf, + InetSocketAddress addr, AMRMTokenSecretManager secretManager) { + return rpc.getServer(ApplicationMasterProtocol.class, this, addr, + serverConf, secretManager, + serverConf.getInt(YarnConfiguration.RM_SCHEDULER_CLIENT_THREAD_COUNT, + YarnConfiguration.DEFAULT_RM_SCHEDULER_CLIENT_THREAD_COUNT)); + } + @Private public InetSocketAddress getBindAddress() { return this.masterServiceAddress; diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/DistributedSchedulingService.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/DistributedSchedulingService.java new file mode 100644 index 0000000..9035990 --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/DistributedSchedulingService.java @@ -0,0 +1,152 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.server.resourcemanager; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.ipc.RPC; +import org.apache.hadoop.ipc.Server; +import org.apache.hadoop.yarn.api.ApplicationMasterProtocolPB; +import org.apache.hadoop.yarn.api.DistributedSchedulerProtocol; +import org.apache.hadoop.yarn.api.impl.pb.service.ApplicationMasterProtocolPBServiceImpl; + + +import org.apache.hadoop.yarn.api.protocolrecords.AllocateRequest; +import org.apache.hadoop.yarn.api.protocolrecords.AllocateResponse; +import org.apache.hadoop.yarn.api.protocolrecords.DistSchedAllocateResponse; +import org.apache.hadoop.yarn.api.protocolrecords.DistSchedRegisterResponse; +import org.apache.hadoop.yarn.api.protocolrecords.FinishApplicationMasterRequest; +import org.apache.hadoop.yarn.api.protocolrecords.FinishApplicationMasterResponse; +import org.apache.hadoop.yarn.api.protocolrecords.RegisterApplicationMasterRequest; +import org.apache.hadoop.yarn.api.protocolrecords.RegisterApplicationMasterResponse; + +import org.apache.hadoop.yarn.api.records.Resource; +import org.apache.hadoop.yarn.conf.YarnConfiguration; +import org.apache.hadoop.yarn.exceptions.YarnException; +import org.apache.hadoop.yarn.ipc.YarnRPC; +import org.apache.hadoop.yarn.proto.ApplicationMasterProtocol.ApplicationMasterProtocolService; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.YarnScheduler; +import org.apache.hadoop.yarn.server.resourcemanager.security + .AMRMTokenSecretManager; + +import java.io.IOException; +import java.net.InetSocketAddress; +import java.util.ArrayList; + +public class DistributedSchedulingService extends ApplicationMasterService + implements DistributedSchedulerProtocol { + + public DistributedSchedulingService(RMContext rmContext, + YarnScheduler scheduler) { + super(DistributedSchedulingService.class.getName(), rmContext, scheduler); + } + + @Override + public Server getServer(YarnRPC rpc, Configuration serverConf, + InetSocketAddress addr, AMRMTokenSecretManager secretManager) { + Server server = rpc.getServer(DistributedSchedulerProtocol.class, this, + addr, serverConf, secretManager, + serverConf.getInt(YarnConfiguration.RM_SCHEDULER_CLIENT_THREAD_COUNT, + YarnConfiguration.DEFAULT_RM_SCHEDULER_CLIENT_THREAD_COUNT)); + // To support application running no NMs that DO NOT support + // Dist Scheduling... + ((RPC.Server) server).addProtocol(RPC.RpcKind.RPC_PROTOCOL_BUFFER, + ApplicationMasterProtocolPB.class, + ApplicationMasterProtocolService.newReflectiveBlockingService( + new ApplicationMasterProtocolPBServiceImpl(this))); + return server; + } + + @Override + public RegisterApplicationMasterResponse registerApplicationMaster + (RegisterApplicationMasterRequest request) throws YarnException, + IOException { + return super.registerApplicationMaster(request); + } + + @Override + public FinishApplicationMasterResponse finishApplicationMaster + (FinishApplicationMasterRequest request) throws YarnException, + IOException { + return super.finishApplicationMaster(request); + } + + @Override + public AllocateResponse allocate(AllocateRequest request) throws + YarnException, IOException { + return super.allocate(request); + } + + @Override + public DistSchedRegisterResponse + registerApplicationMasterForDistributedScheduling( + RegisterApplicationMasterRequest request) throws YarnException, + IOException { + RegisterApplicationMasterResponse response = + registerApplicationMaster(request); + DistSchedRegisterResponse dsResp = recordFactory + .newRecordInstance(DistSchedRegisterResponse.class); + dsResp.setRegisterResponse(response); + dsResp.setMinAllocatableCapabilty( + Resource.newInstance( + getConfig().getInt( + YarnConfiguration.DIST_SCHEDULING_MIN_MEMORY, + YarnConfiguration.DIST_SCHEDULING_MIN_MEMORY_DEFAULT), + getConfig().getInt( + YarnConfiguration.DIST_SCHEDULING_MIN_VCORES, + YarnConfiguration.DIST_SCHEDULING_MIN_VCORES_DEFAULT) + ) + ); + dsResp.setMaxAllocatableCapabilty( + Resource.newInstance( + getConfig().getInt( + YarnConfiguration.DIST_SCHEDULING_MAX_MEMORY, + YarnConfiguration.DIST_SCHEDULING_MAX_MEMORY_DEFAULT), + getConfig().getInt( + YarnConfiguration.DIST_SCHEDULING_MAX_VCORES, + YarnConfiguration.DIST_SCHEDULING_MAX_VCORES_DEFAULT) + ) + ); + dsResp.setContainerTokenExpiryInterval( + getConfig().getInt( + YarnConfiguration.DIST_SCHEDULING_CONTAINER_TOKEN_EXPIRY_MS, + YarnConfiguration. + DIST_SCHEDULING_CONTAINER_TOKEN_EXPIRY_MS_DEFAULT)); + dsResp.setContainerIdStart( + this.rmContext.getEpoch() << ResourceManager.EPOCH_BIT_SHIFT); + + // Set nodes to be used for scheduling + // TODO: The actual computation of the list will happen in YARN-4412 + // TODO: Till then, send the complete list + dsResp.setNodesForScheduling( + new ArrayList<>(this.rmContext.getRMNodes().keySet())); + return dsResp; + } + + @Override + public DistSchedAllocateResponse allocateForDistributedScheduling + (AllocateRequest request) throws YarnException, IOException { + AllocateResponse response = allocate(request); + DistSchedAllocateResponse dsResp = recordFactory.newRecordInstance + (DistSchedAllocateResponse.class); + dsResp.setAllocateResponse(response); + dsResp.setNodesForScheduling( + new ArrayList<>(this.rmContext.getRMNodes().keySet())); + return dsResp; + } +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ResourceManager.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ResourceManager.java index 01a1c8f..4e26257 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ResourceManager.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ResourceManager.java @@ -120,6 +120,11 @@ */ public static final int SHUTDOWN_HOOK_PRIORITY = 30; + /** + * Used for generation of various ids. + */ + public static final int EPOCH_BIT_SHIFT = 40; + private static final Log LOG = LogFactory.getLog(ResourceManager.class); private static long clusterTimeStamp = System.currentTimeMillis(); @@ -1116,6 +1121,11 @@ protected ClientRMService createClientRMService() { } protected ApplicationMasterService createApplicationMasterService() { + if (this.rmContext.getYarnConfiguration().getBoolean( + YarnConfiguration.DIST_SCHEDULING_ENABLED, + YarnConfiguration.DIST_SCHEDULING_ENABLED_DEFAULT)) { + return new DistributedSchedulingService(this.rmContext, scheduler); + } return new ApplicationMasterService(this.rmContext, scheduler); } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/AppSchedulingInfo.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/AppSchedulingInfo.java index c5f8cd1..2564228 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/AppSchedulingInfo.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/AppSchedulingInfo.java @@ -42,6 +42,7 @@ import org.apache.hadoop.yarn.api.records.Priority; import org.apache.hadoop.yarn.api.records.Resource; import org.apache.hadoop.yarn.api.records.ResourceRequest; +import org.apache.hadoop.yarn.server.resourcemanager.ResourceManager; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptState; import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer; import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerState; @@ -63,7 +64,6 @@ final String user; // TODO making containerIdCounter long private final AtomicLong containerIdCounter; - private final int EPOCH_BIT_SHIFT = 40; final Set priorities = new TreeSet( new org.apache.hadoop.yarn.server.resourcemanager.resource.Priority.Comparator()); @@ -82,7 +82,7 @@ boolean pending = true; // for app metrics private ResourceUsage appResourceUsage; - + public AppSchedulingInfo(ApplicationAttemptId appAttemptId, String user, Queue queue, ActiveUsersManager activeUsersManager, long epoch, ResourceUsage appResourceUsage) { @@ -92,7 +92,8 @@ public AppSchedulingInfo(ApplicationAttemptId appAttemptId, this.queueName = queue.getQueueName(); this.user = user; this.activeUsersManager = activeUsersManager; - this.containerIdCounter = new AtomicLong(epoch << EPOCH_BIT_SHIFT); + this.containerIdCounter = + new AtomicLong(epoch << ResourceManager.EPOCH_BIT_SHIFT); this.appResourceUsage = appResourceUsage; } @@ -126,7 +127,9 @@ private synchronized void clearRequests() { } public long getNewContainerId() { - return this.containerIdCounter.incrementAndGet(); + // For Distributed Scheduling, to differentiate containerIds generated + // by the RM and NM. Those generated by the RM are always even numbers. + return this.containerIdCounter.addAndGet(2); } public boolean hasIncreaseRequest(NodeId nodeId) { diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/MockRM.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/MockRM.java index 0372cd7..bd1f15c 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/MockRM.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/MockRM.java @@ -91,6 +91,8 @@ import org.apache.hadoop.yarn.server.resourcemanager.security.ClientToAMTokenSecretManagerInRM; import org.apache.hadoop.yarn.server.resourcemanager.security.NMTokenSecretManagerInRM; import org.apache.hadoop.yarn.server.resourcemanager.security.RMContainerTokenSecretManager; + + import org.apache.hadoop.yarn.util.Records; import org.apache.hadoop.yarn.util.YarnVersionInfo; import org.apache.log4j.Level; @@ -702,6 +704,21 @@ protected void serviceStop() { @Override protected ApplicationMasterService createApplicationMasterService() { + if (this.rmContext.getYarnConfiguration().getBoolean( + YarnConfiguration.DIST_SCHEDULING_ENABLED, + YarnConfiguration.DIST_SCHEDULING_ENABLED_DEFAULT)) { + return new DistributedSchedulingService(getRMContext(), scheduler) { + @Override + protected void serviceStart() { + // override to not start rpc handler + } + + @Override + protected void serviceStop() { + // don't do anything + } + }; + } return new ApplicationMasterService(getRMContext(), scheduler) { @Override protected void serviceStart() { diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestDistributedSchedulingService.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestDistributedSchedulingService.java new file mode 100644 index 0000000..a52e3aa --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestDistributedSchedulingService.java @@ -0,0 +1,170 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.server.resourcemanager; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.ipc.ProtobufRpcEngine; +import org.apache.hadoop.ipc.RPC; +import org.apache.hadoop.ipc.Server; +import org.apache.hadoop.net.NetUtils; +import org.apache.hadoop.yarn.api.ApplicationMasterProtocol; +import org.apache.hadoop.yarn.api.ApplicationMasterProtocolPB; +import org.apache.hadoop.yarn.api.DistributedSchedulerProtocol; +import org.apache.hadoop.yarn.api.DistributedSchedulerProtocolPB; +import org.apache.hadoop.yarn.api.protocolrecords.AllocateRequest; +import org.apache.hadoop.yarn.api.protocolrecords.AllocateResponse; +import org.apache.hadoop.yarn.api.protocolrecords.DistSchedAllocateResponse; +import org.apache.hadoop.yarn.api.protocolrecords.DistSchedRegisterResponse; +import org.apache.hadoop.yarn.api.protocolrecords.FinishApplicationMasterRequest; +import org.apache.hadoop.yarn.api.protocolrecords.FinishApplicationMasterResponse; +import org.apache.hadoop.yarn.api.protocolrecords.RegisterApplicationMasterRequest; +import org.apache.hadoop.yarn.api.protocolrecords.RegisterApplicationMasterResponse; + +import org.apache.hadoop.yarn.api.records.NodeId; +import org.apache.hadoop.yarn.conf.YarnConfiguration; +import org.apache.hadoop.yarn.exceptions.YarnException; +import org.apache.hadoop.yarn.factories.RecordFactory; +import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider; +import org.apache.hadoop.yarn.ipc.HadoopYarnProtoRPC; +import org.apache.hadoop.yarn.ipc.YarnRPC; +import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt + .AMLivelinessMonitor; + +import org.junit.Assert; +import org.junit.Test; + +import java.io.IOException; +import java.net.InetSocketAddress; +import java.util.Arrays; + +public class TestDistributedSchedulingService { + + // Test if the DistributedSchedulingService can handle both DSProtocol as + // well as AMProtocol clients + @Test + public void testRPCWrapping() throws Exception { + Configuration conf = new Configuration(); + conf.set(YarnConfiguration.IPC_RPC_IMPL, HadoopYarnProtoRPC.class + .getName()); + YarnRPC rpc = YarnRPC.create(conf); + String bindAddr = "localhost:0"; + InetSocketAddress addr = NetUtils.createSocketAddr(bindAddr); + conf.setSocketAddr(YarnConfiguration.RM_SCHEDULER_ADDRESS, addr); + final RecordFactory factory = RecordFactoryProvider.getRecordFactory(null); + final RMContext rmContext = new RMContextImpl() { + @Override + public AMLivelinessMonitor getAMLivelinessMonitor() { + return null; + } + }; + DistributedSchedulingService service = + new DistributedSchedulingService(rmContext, null) { + @Override + public RegisterApplicationMasterResponse registerApplicationMaster + (RegisterApplicationMasterRequest request) throws + YarnException, IOException { + RegisterApplicationMasterResponse resp = factory.newRecordInstance( + RegisterApplicationMasterResponse.class); + // Dummy Entry to Assert that we get this object back + resp.setQueue("dummyQueue"); + return resp; + } + + @Override + public FinishApplicationMasterResponse finishApplicationMaster + (FinishApplicationMasterRequest request) throws YarnException, + IOException { + FinishApplicationMasterResponse resp = factory.newRecordInstance( + FinishApplicationMasterResponse.class); + // Dummy Entry to Assert that we get this object back + resp.setIsUnregistered(false); + return resp; + } + + @Override + public AllocateResponse allocate(AllocateRequest request) throws + YarnException, IOException { + AllocateResponse response = factory.newRecordInstance + (AllocateResponse.class); + response.setNumClusterNodes(12345); + return response; + } + + @Override + public DistSchedRegisterResponse + registerApplicationMasterForDistributedScheduling + (RegisterApplicationMasterRequest request) throws + YarnException, IOException { + DistSchedRegisterResponse resp = factory.newRecordInstance( + DistSchedRegisterResponse.class); + resp.setContainerIdStart(54321l); + return resp; + } + + @Override + public DistSchedAllocateResponse allocateForDistributedScheduling + (AllocateRequest request) throws YarnException, IOException { + DistSchedAllocateResponse resp = + factory.newRecordInstance(DistSchedAllocateResponse.class); + resp.setNodesForScheduling( + Arrays.asList(NodeId.newInstance("h1", 1234))); + return resp; + } + }; + Server server = service.getServer(rpc, conf, addr, null); + server.start(); + + // Verify that the DistrubutedSchedulingService can handle vanilla + // ApplicationMasterProtocol clients + RPC.setProtocolEngine(conf, ApplicationMasterProtocolPB.class, + ProtobufRpcEngine.class); + ApplicationMasterProtocol ampProxy = + (ApplicationMasterProtocol) rpc.getProxy(ApplicationMasterProtocol + .class, NetUtils.getConnectAddress(server), conf); + RegisterApplicationMasterResponse regResp = ampProxy.registerApplicationMaster( + factory.newRecordInstance(RegisterApplicationMasterRequest.class)); + Assert.assertEquals("dummyQueue", regResp.getQueue()); + FinishApplicationMasterResponse finishResp = ampProxy + .finishApplicationMaster(factory.newRecordInstance( + FinishApplicationMasterRequest.class)); + Assert.assertEquals(false, finishResp.getIsUnregistered()); + AllocateResponse allocResp = ampProxy + .allocate(factory.newRecordInstance(AllocateRequest.class)); + Assert.assertEquals(12345, allocResp.getNumClusterNodes()); + + + // Verify that the DistrubutedSchedulingService can handle the + // DistributedSchedulerProtocol clients as well + RPC.setProtocolEngine(conf, DistributedSchedulerProtocolPB.class, + ProtobufRpcEngine.class); + DistributedSchedulerProtocol dsProxy = + (DistributedSchedulerProtocol) rpc.getProxy(DistributedSchedulerProtocol + .class, NetUtils.getConnectAddress(server), conf); + + DistSchedRegisterResponse dsRegResp = + dsProxy.registerApplicationMasterForDistributedScheduling( + factory.newRecordInstance(RegisterApplicationMasterRequest.class)); + Assert.assertEquals(54321l, dsRegResp.getContainerIdStart()); + DistSchedAllocateResponse dsAllocResp = + dsProxy.allocateForDistributedScheduling( + factory.newRecordInstance(AllocateRequest.class)); + Assert.assertEquals( + "h1", dsAllocResp.getNodesForScheduling().get(0).getHost()); + } +}