Details
Description
I can't run yarn application, and the job gets stucked every time I execute the command 'hadoop jar xxx.....'.
The Myriad Web UI shows that the status of the task changes from staging to pending, then staging, then pending.....
There are some screenshots in the attachments below.
Please help me. Thank you very much.
yarn-mesos-resourcemanager-s18.log:
2017-07-20 17:36:46,941 INFO org.apache.hadoop.ipc.Server: IPC Server Responder: starting 2017-07-20 17:36:46,941 INFO org.apache.hadoop.ipc.Server: IPC Server listener on 8032: starting 2017-07-20 17:36:46,955 INFO org.apache.myriad.scheduler.event.handlers.ExecutorLostEventHandler: Executor value: "myriad_executor5b12e719-6990-4546-9370-b984fc35b759-00005b12e719-6990-4546-9370-b984fc35b759-O08e5e7115-060b-42c6-b24f-64642048cfa1-S0" of slave value: "8e5e7115-060b-42c6-b24f-64642048cfa1-S0" lost with exit status: 9 2017-07-20 17:36:46,969 INFO org.apache.myriad.scheduler.event.handlers.StatusUpdateEventHandler: Status Update for task: nm.medium.ddd2cdab-db6b-4dbc-9d5d-8c339c056898 | state: TASK_FAILED 2017-07-20 17:36:46,970 INFO org.apache.hadoop.yarn.server.resourcemanager.ResourceManager: Transitioned to active state 2017-07-20 17:36:46,971 INFO org.apache.myriad.scheduler.event.handlers.StatusUpdateEventHandler: Marked as pending failed task with id value: "nm.medium.ddd2cdab-db6b-4dbc-9d5d-8c339c056898"
myriad-config-default.yml:
mesosMaster: 192.168.130.18:5050 checkpoint: false frameworkFailoverTimeout: 43200000 frameworkName: MyriadAlpha frameworkRole: "yarn" frameworkUser: mesos # User the Node Manager runs as, required if nodeManagerURI set, otherwise defaults to the user # running the resource manager. frameworkSuperUser: root # To be deprecated, currently permissions need set by a superuser due to Mesos-1790. Must be # root or have passwordless sudo. Required if nodeManagerURI set, ignored otherwise. #nativeLibrary: /usr/local/lib/libmesos.so nativeLibrary: /home/mesos/mesos/mesos-1.2.0/mesos_install/lib/libmesos.so zkServers: 192.168.130.18:2181 zkTimeout: 20000 restApiPort: 8192 #servedConfigPath: dist/config.tgz servedConfigPath: /home/mesos/mesos/hdfs/hadoop-2.7.3/etc/config.tgz #servedBinaryPath: dist/hadoop-2.6.0.tgz servedBinaryPath: /home/mesos/mesos/hdfs/binary.tgz profiles: zero: # NMs launched with this profile dynamically obtain cpu/mem from Mesos cpu: 0 mem: 0 small: cpu: 2 mem: 2048 medium: cpu: 4 mem: 4096 large: cpu: 10 mem: 12288 nmInstances: # NMs to start with. Requires at least 1 NM with a non-zero profile. medium: 1 # <profile_name : instances> rebalancer: false haEnabled: false nodemanager: jvmMaxMemoryMB: 1024 cpus: 0.2 cgroups: false executor: jvmMaxMemoryMB: 256 path: hdfs://s18:8020/dist/myriad-executor-0.2.0.jar #path: file:///home/mesos/mesos/hdfs/hadoop-2.7.3/share/hadoop/yarn/lib/myriad-executor-0.2.0.jar #The following should be used for a remotely distributed URI, hdfs assumed but other URI types valid. #nodeManagerUri: hdfs://s18:8020/user/MR/binary.tgz #configUri: http://s18:8192/api/artifacts/config.tgz #jvmUri: https://downloads.mycompany.com/java/jre-7u76-linux-x64.tar.gz yarnEnvironment: YARN_HOME: /home/mesos/mesos/hdfs/hadoop-2.7.3 #HADOOP_CONF_DIR=config #HADOOP_TMP_DIR=$MESOS_SANDBOX #YARN_HOME: hadoop-2.7.0 #this should be relative if nodeManagerUri is set #JAVA_HOME: /usr/lib/jvm/java-default #System dependent, but sometimes necessary #JAVA_HOME: /home/mesos/jdk/jdk1.8.0_131 #JAVA_HOME: jre1.7.0_76 # Path to JRE distribution, relative to sandbox directory #JAVA_LIBRARY_PATH: /opt/mycompany/lib #mesosAuthenticationPrincipal: #mesosAuthenticationSecretFilename:
yarn-site.xml:
<configuration> <!-- Site specific YARN configuration properties --> <property> <name>yarn.resourcemanager.hostname</name> <value>s18</value> </property> <property> <name>yarn.nodemanager.aux-services</name> <value>mapreduce_shuffle</value> </property> <property> <name>yarn.nodemanager.aux-services</name> <value>mapreduce_shuffle,myriad_executor</value> <!-- If using MapR distro, please use the following value: <value>mapreduce_shuffle,mapr_direct_shuffle,myriad_executor</value> --> </property> <property> <name>yarn.nodemanager.aux-services.mapreduce_shuffle.class</name> <value>org.apache.hadoop.mapred.ShuffleHandler</value> </property> <property> <name>yarn.nodemanager.aux-services.myriad_executor.class</name> <value>org.apache.myriad.executor.MyriadExecutorAuxService</value> </property> <property> <name>yarn.nm.liveness-monitor.expiry-interval-ms</name> <value>2000</value> </property> <property> <name>yarn.am.liveness-monitor.expiry-interval-ms</name> <value>10000</value> </property> <property> <name>yarn.resourcemanager.nm.liveness-monitor.interval-ms</name> <value>1000</value> </property> <!-- (more) Site-specific YARN configuration properties --> <property> <name>yarn.nodemanager.resource.cpu-vcores</name> <value>${nodemanager.resource.cpu-vcores}</value> </property> <property> <name>yarn.nodemanager.resource.memory-mb</name> <value>${nodemanager.resource.memory-mb}</value> </property> <!-- Disable pmem/vmem checks needed for FGS and hadoop 2.7.2 --> <property> <name>yarn.nodemanager.pmem-check-enabled</name> <value>false</value> </property> <property> <name>yarn.nodemanager.vmem-check-enabled</name> <value>false</value> </property> <!-- Dynamic Port Assignment enablement by Mesos --> <property> <name>yarn.nodemanager.address</name> <value>${myriad.yarn.nodemanager.address}</value> </property> <property> <name>yarn.nodemanager.webapp.address</name> <value>${myriad.yarn.nodemanager.webapp.address}</value> </property> <property> <name>yarn.nodemanager.webapp.https.address</name> <value>${myriad.yarn.nodemanager.webapp.address}</value> </property> <property> <name>yarn.nodemanager.localizer.address</name> <value>${myriad.yarn.nodemanager.localizer.address}</value> </property> <!-- Myriad Scheduler configuration --> <property> <name>yarn.resourcemanager.scheduler.class</name> <value>org.apache.myriad.scheduler.yarn.MyriadFairScheduler</value> </property> <!-- Needed for Fine Grain Scaling --> <property> <name>yarn.scheduler.minimum-allocation-vcores</name> <value>0</value> </property> <property> <name>yarn.scheduler.minimum-allocation-mb</name> <value>0</value> </property> </configuration>