diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/webapp/WebPageUtils.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/webapp/WebPageUtils.java index 653a741..285ffca 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/webapp/WebPageUtils.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/webapp/WebPageUtils.java @@ -56,7 +56,7 @@ private static String getAppsTableColumnDefs( .append(", 'mRender': renderHadoopDate }"); if (isResourceManager) { sb.append("\n, {'sType':'num-ignore-str', 'aTargets': [11, 12, 13, 14, 15] }"); - progressIndex = "[18]"; + progressIndex = "[20]"; } else if (isFairSchedulerPage) { sb.append("\n, {'sType':'num-ignore-str', 'aTargets': [11, 12, 13, 14, 15] }"); progressIndex = "[16]"; @@ -103,4 +103,4 @@ public static String resourceRequestsTableInit() { .toString(); } -} \ No newline at end of file +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/webapp/dao/AppInfo.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/webapp/dao/AppInfo.java index 27469a8..b52a9ef 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/webapp/dao/AppInfo.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/webapp/dao/AppInfo.java @@ -30,8 +30,10 @@ import org.apache.hadoop.yarn.api.records.ApplicationReport; import org.apache.hadoop.yarn.api.records.ApplicationResourceUsageReport; import org.apache.hadoop.yarn.api.records.FinalApplicationStatus; +import org.apache.hadoop.yarn.api.records.ResourceInformation; import org.apache.hadoop.yarn.api.records.YarnApplicationState; import org.apache.hadoop.yarn.util.Times; +import org.apache.hadoop.yarn.util.resource.ResourceUtils; @Public @Evolving @@ -63,8 +65,10 @@ protected int priority; private long allocatedCpuVcores; private long allocatedMemoryMB; + private long allocatedGpus; private long reservedCpuVcores; private long reservedMemoryMB; + private long reservedGpus; protected boolean unmanagedApplication; private String appNodeLabelExpression; private String amNodeLabelExpression; @@ -110,6 +114,16 @@ public AppInfo(ApplicationReport app) { reservedCpuVcores = usageReport.getReservedResources(). getVirtualCores(); reservedMemoryMB = usageReport.getReservedResources().getMemorySize(); + Integer gpuIndex = ResourceUtils.getResourceTypeIndex() + .get(ResourceInformation.GPU_URI); + allocatedGpus = -1; + reservedGpus = -1; + if (gpuIndex != null) { + allocatedGpus = usageReport.getUsedResources() + .getResourceValue(ResourceInformation.GPU_URI); + reservedGpus = usageReport.getReservedResources() + .getResourceValue(ResourceInformation.GPU_URI); + } } aggregateResourceAllocation = usageReport.getMemorySeconds() + " MB-seconds, " + usageReport.getVcoreSeconds() @@ -175,6 +189,10 @@ public long getAllocatedMemoryMB() { return allocatedMemoryMB; } + public long getAllocatedGpus() { + return allocatedGpus; + } + public long getReservedCpuVcores() { return reservedCpuVcores; } @@ -183,6 +201,10 @@ public long getReservedMemoryMB() { return reservedMemoryMB; } + public long getReservedGpus() { + return reservedGpus; + } + public float getProgress() { return progress; } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/MetricsOverviewTable.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/MetricsOverviewTable.java index ba6f764..c81883d 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/MetricsOverviewTable.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/MetricsOverviewTable.java @@ -19,14 +19,15 @@ package org.apache.hadoop.yarn.server.resourcemanager.webapp; import org.apache.hadoop.util.StringUtils; +import org.apache.hadoop.yarn.api.records.Resource; import org.apache.hadoop.yarn.api.records.ResourceTypeInfo; import org.apache.hadoop.yarn.server.resourcemanager.ResourceManager; import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.ClusterMetricsInfo; -import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.ResourceInfo; import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.SchedulerInfo; import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.UserMetricsInfo; import org.apache.hadoop.yarn.util.resource.ResourceUtils; +import org.apache.hadoop.yarn.util.resource.Resources; import org.apache.hadoop.yarn.webapp.hamlet.Hamlet; import org.apache.hadoop.yarn.webapp.hamlet.Hamlet.DIV; import org.apache.hadoop.yarn.webapp.view.HtmlBlock; @@ -62,35 +63,34 @@ protected void render(Block html) { DIV div = html.div().$class("metrics"); - long usedMemoryBytes = 0; - long totalMemoryBytes = 0; - long reservedMemoryBytes = 0; - long usedVCores = 0; - long totalVCores = 0; - long reservedVCores = 0; + Resource usedResources; + Resource totalResources; + Resource reservedResources; + int allocatedContainers; if (clusterMetrics.getCrossPartitionMetricsAvailable()) { - ResourceInfo usedAllPartitions = - clusterMetrics.getTotalUsedResourcesAcrossPartition(); - ResourceInfo totalAllPartitions = - clusterMetrics.getTotalClusterResourcesAcrossPartition(); - ResourceInfo reservedAllPartitions = - clusterMetrics.getTotalReservedResourcesAcrossPartition(); - usedMemoryBytes = usedAllPartitions.getMemorySize() * BYTES_IN_MB; - totalMemoryBytes = totalAllPartitions.getMemorySize() * BYTES_IN_MB; - reservedMemoryBytes = reservedAllPartitions.getMemorySize() * BYTES_IN_MB; - usedVCores = usedAllPartitions.getvCores(); - totalVCores = totalAllPartitions.getvCores(); - reservedVCores = reservedAllPartitions.getvCores(); + allocatedContainers = + clusterMetrics.getTotalAllocatedContainersAcrossPartition(); + usedResources = + clusterMetrics.getTotalUsedResourcesAcrossPartition().getResource(); + totalResources = + clusterMetrics.getTotalClusterResourcesAcrossPartition() + .getResource(); + reservedResources = + clusterMetrics.getTotalReservedResourcesAcrossPartition() + .getResource(); // getTotalUsedResourcesAcrossPartition includes reserved resources. - usedMemoryBytes -= reservedMemoryBytes; - usedVCores -= reservedVCores; + Resources.subtractFrom(usedResources, reservedResources); } else { - usedMemoryBytes = clusterMetrics.getAllocatedMB() * BYTES_IN_MB; - totalMemoryBytes = clusterMetrics.getTotalMB() * BYTES_IN_MB; - reservedMemoryBytes = clusterMetrics.getReservedMB() * BYTES_IN_MB; - usedVCores = clusterMetrics.getAllocatedVirtualCores(); - totalVCores = clusterMetrics.getTotalVirtualCores(); - reservedVCores = clusterMetrics.getReservedVirtualCores(); + allocatedContainers = clusterMetrics.getContainersAllocated(); + usedResources = Resource.newInstance( + clusterMetrics.getAllocatedMB() * BYTES_IN_MB, + (int) clusterMetrics.getAllocatedVirtualCores()); + totalResources = Resource.newInstance( + clusterMetrics.getTotalMB() * BYTES_IN_MB, + (int) clusterMetrics.getTotalVirtualCores()); + reservedResources = Resource.newInstance( + clusterMetrics.getReservedMB() * BYTES_IN_MB, + (int) clusterMetrics.getReservedVirtualCores()); } div.h3("Cluster Metrics"). @@ -102,12 +102,9 @@ protected void render(Block html) { th().$class("ui-state-default")._("Apps Running")._(). th().$class("ui-state-default")._("Apps Completed")._(). th().$class("ui-state-default")._("Containers Running")._(). - th().$class("ui-state-default")._("Memory Used")._(). - th().$class("ui-state-default")._("Memory Total")._(). - th().$class("ui-state-default")._("Memory Reserved")._(). - th().$class("ui-state-default")._("VCores Used")._(). - th().$class("ui-state-default")._("VCores Total")._(). - th().$class("ui-state-default")._("VCores Reserved")._(). + th().$class("ui-state-default")._("Used Resources")._(). + th().$class("ui-state-default")._("Total Resources")._(). + th().$class("ui-state-default")._("Reserved Resources")._(). _(). _(). tbody().$class("ui-widget-content"). @@ -121,14 +118,10 @@ protected void render(Block html) { clusterMetrics.getAppsFailed() + clusterMetrics.getAppsKilled() ) ). - td(String.valueOf( - clusterMetrics.getTotalAllocatedContainersAcrossPartition())). - td(StringUtils.byteDesc(usedMemoryBytes)). - td(StringUtils.byteDesc(totalMemoryBytes)). - td(StringUtils.byteDesc(reservedMemoryBytes)). - td(String.valueOf(usedVCores)). - td(String.valueOf(totalVCores)). - td(String.valueOf(reservedVCores)). + td(String.valueOf(allocatedContainers)). + td(usedResources.toString()). + td(totalResources.toString()). + td(reservedResources.toString()). _(). _()._(); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/NodesPage.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/NodesPage.java index e080f59..f9fb7dc 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/NodesPage.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/NodesPage.java @@ -22,6 +22,7 @@ import org.apache.commons.lang.StringEscapeUtils; import org.apache.hadoop.util.StringUtils; import org.apache.hadoop.yarn.api.records.NodeState; +import org.apache.hadoop.yarn.api.records.ResourceInformation; import org.apache.hadoop.yarn.conf.YarnConfiguration; import org.apache.hadoop.yarn.nodelabels.CommonNodeLabelsManager; import org.apache.hadoop.yarn.server.resourcemanager.ResourceManager; @@ -30,6 +31,7 @@ import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceScheduler; import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.NodeInfo; import org.apache.hadoop.yarn.util.Times; +import org.apache.hadoop.yarn.util.resource.ResourceUtils; import org.apache.hadoop.yarn.webapp.SubView; import org.apache.hadoop.yarn.webapp.hamlet.Hamlet; import org.apache.hadoop.yarn.webapp.hamlet.Hamlet.TABLE; @@ -85,13 +87,17 @@ protected void render(Block html) { .th(".mem", "Mem Used") .th(".mem", "Mem Avail") .th(".vcores", "VCores Used") - .th(".vcores", "VCores Avail"); + .th(".vcores", "VCores Avail") + .th(".gpus", "GPUs Used") + .th(".gpus", "GPUs Avail"); } else { trbody.th(".containers", "Running Containers (G)") .th(".mem", "Mem Used (G)") .th(".mem", "Mem Avail (G)") .th(".vcores", "VCores Used (G)") .th(".vcores", "VCores Avail (G)") + .th(".gpus", "GPUs Used (G)") + .th(".gpus", "GPUs Avail (G)") .th(".containers", "Running Containers (O)") .th(".mem", "Mem Used (O)") .th(".vcores", "VCores Used (O)") @@ -163,6 +169,16 @@ protected void render(Block html) { nodeTableData.append("\",\"").append(httpAddress).append("\",").append("\""); } + Integer gpuIndex = ResourceUtils.getResourceTypeIndex() + .get(ResourceInformation.GPU_URI); + long usedGPUs = 0; + long availableGPUs = 0; + if (gpuIndex != null) { + usedGPUs = info.getUsedResource().getResource() + .getResourceValue(ResourceInformation.GPU_URI); + availableGPUs = info.getAvailableResource().getResource() + .getResourceValue(ResourceInformation.GPU_URI); + } nodeTableData.append("
") .append(Times.format(info.getLastHealthUpdate())).append("\",\"") @@ -176,6 +192,10 @@ protected void render(Block html) { .append("\",\"").append(String.valueOf(info.getUsedVirtualCores())) .append("\",\"") .append(String.valueOf(info.getAvailableVirtualCores())) + .append("\",\"") + .append(String.valueOf(usedGPUs)) + .append("\",\"") + .append(String.valueOf(availableGPUs)) .append("\",\""); // If opportunistic containers are enabled, add extra fields. diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/RMAppsBlock.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/RMAppsBlock.java index e0baf10..31e0b72 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/RMAppsBlock.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/RMAppsBlock.java @@ -69,8 +69,10 @@ protected void renderData(Block html) { .th(".runningcontainer", "Running Containers") .th(".allocatedCpu", "Allocated CPU VCores") .th(".allocatedMemory", "Allocated Memory MB") + .th(".allocatedGpu", "Allocated GPUs") .th(".reservedCpu", "Reserved CPU VCores") .th(".reservedMemory", "Reserved Memory MB") + .th(".reservedGpu", "Reserved GPUs") .th(".queuePercentage", "% of Queue") .th(".clusterPercentage", "% of Cluster") .th(".progress", "Progress") @@ -104,6 +106,7 @@ protected void renderData(Block html) { String blacklistedNodesCount = "N/A"; RMApp rmApp = rm.getRMContext().getRMApps() .get(appAttemptId.getApplicationId()); + boolean isAppInCompletedState = false; if (rmApp != null) { RMAppAttempt appAttempt = rmApp.getRMAppAttempt(appAttemptId); Set nodes = @@ -111,6 +114,7 @@ protected void renderData(Block html) { if (nodes != null) { blacklistedNodesCount = String.valueOf(nodes.size()); } + isAppInCompletedState = rmApp.isAppInCompletedStates(); } String percent = StringUtils.format("%.1f", app.getProgress()); appsTableData @@ -152,12 +156,18 @@ protected void renderData(Block html) { .append(app.getAllocatedMemoryMB() == -1 ? "N/A" : String.valueOf(app.getAllocatedMemoryMB())) .append("\",\"") + .append((isAppInCompletedState && app.getAllocatedGpus() <= 0) + ? UNAVAILABLE : String.valueOf(app.getAllocatedGpus())) + .append("\",\"") .append(app.getReservedCpuVcores() == -1 ? "N/A" : String .valueOf(app.getReservedCpuVcores())) .append("\",\"") .append(app.getReservedMemoryMB() == -1 ? "N/A" : String.valueOf(app.getReservedMemoryMB())) .append("\",\"") + .append((isAppInCompletedState && app.getReservedGpus() <= 0) + ? UNAVAILABLE : String.valueOf(app.getReservedGpus())) + .append("\",\"") .append(queuePercent) .append("\",\"") .append(clusterPercent) diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/TestNodesPage.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/TestNodesPage.java index cc97674..3a1ec1b 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/TestNodesPage.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/TestNodesPage.java @@ -48,8 +48,8 @@ // Number of Actual Table Headers for NodesPage.NodesBlock might change in // future. In that case this value should be adjusted to the new value. - final int numberOfThInMetricsTable = 23; - final int numberOfActualTableHeaders = 13; + final int numberOfThInMetricsTable = 20; + final int numberOfActualTableHeaders = 15; private final int numberOfThForOpportunisticContainers = 4; private Injector injector;