commit 318b1f98c0acb826f629023d0f73ea67ad6e6d8a Author: Vihang Karajgaonkar Date: Sat May 12 11:12:49 2018 -0700 HIVE-19510 : Add performance metric to find the total time spend in rsync diff --git a/testutils/ptest2/src/main/java/org/apache/hive/ptest/execution/ExecutionPhase.java b/testutils/ptest2/src/main/java/org/apache/hive/ptest/execution/ExecutionPhase.java index 2015187f74bd92965f739651939fe7043a1b078a..7ab98f6a6caaa730907d9b9d20278509aad9e8d0 100644 --- a/testutils/ptest2/src/main/java/org/apache/hive/ptest/execution/ExecutionPhase.java +++ b/testutils/ptest2/src/main/java/org/apache/hive/ptest/execution/ExecutionPhase.java @@ -22,8 +22,10 @@ import java.io.IOException; import java.util.Collection; import java.util.Collections; +import java.util.HashMap; import java.util.HashSet; import java.util.List; +import java.util.Map; import java.util.Set; import java.util.concurrent.BlockingQueue; import java.util.concurrent.LinkedBlockingQueue; @@ -102,6 +104,7 @@ public void execute() throws Throwable { try { int expectedNumHosts = hostExecutors.size(); initalizeHosts(); + resetPerfMetrics(); do { replaceBadHosts(expectedNumHosts); List> results = Lists.newArrayList(); @@ -145,10 +148,21 @@ public void execute() throws Throwable { } } finally { long elapsed = System.currentTimeMillis() - start; + addAggregatePerfMetrics(); logger.info("PERF: exec phase " + TimeUnit.MINUTES.convert(elapsed, TimeUnit.MILLISECONDS) + " minutes"); } } + + public static final String TOTAL_RSYNC_TIME = "TotalRsyncElapsedTime"; + private void addAggregatePerfMetrics() { + long totalRsycTime = 0L; + for (HostExecutor hostExecutor : ImmutableList.copyOf(hostExecutors)) { + totalRsycTime += hostExecutor.getTotalRsyncTimeInMs(); + } + addPerfMetric(TOTAL_RSYNC_TIME, totalRsycTime); + } + private void replaceBadHosts(int expectedNumHosts) throws Exception { Set goodHosts = Sets.newHashSet(); diff --git a/testutils/ptest2/src/main/java/org/apache/hive/ptest/execution/HostExecutor.java b/testutils/ptest2/src/main/java/org/apache/hive/ptest/execution/HostExecutor.java index 3a4fa7f3cec10295c77a1e5b066126d7069d2299..47347ebfd49c3c1deadcfded4a5a5b83883267f6 100644 --- a/testutils/ptest2/src/main/java/org/apache/hive/ptest/execution/HostExecutor.java +++ b/testutils/ptest2/src/main/java/org/apache/hive/ptest/execution/HostExecutor.java @@ -28,6 +28,7 @@ import java.util.concurrent.Callable; import java.util.concurrent.CopyOnWriteArrayList; import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicLong; import com.google.common.base.Stopwatch; import org.apache.commons.lang.StringUtils; @@ -70,6 +71,7 @@ private volatile boolean mShutdown; private int numParallelBatchesProcessed = 0; private int numIsolatedBatchesProcessed = 0; + private AtomicLong totalElapsedTimeInRsync = new AtomicLong(0L); HostExecutor(Host host, String privateKey, ListeningExecutorService executor, SSHCommandExecutor sshCommandExecutor, @@ -139,6 +141,10 @@ void shutdownNow() { boolean isShutdown() { return mShutdown; } + + long getTotalRsyncTimeInMs() { + return totalElapsedTimeInRsync.get(); + } /** * Executes parallel test until the parallel work queue is empty. Then * executes the isolated tests on the host. During each phase if a @@ -311,6 +317,7 @@ RSyncResult copyToDroneFromLocal(Drone drone, String localFile, String remoteFil if(result.getException() != null || result.getExitCode() != 0) { throw new SSHExecutionException(result); } + totalElapsedTimeInRsync.getAndAdd(result.getElapsedTimeInMs()); return result; } /** @@ -380,6 +387,7 @@ RSyncResult copyFromDroneToLocal(Drone drone, String localFile, String remoteFil if(result.getException() != null || result.getExitCode() != Constants.EXIT_CODE_SUCCESS) { throw new SSHExecutionException(result); } + totalElapsedTimeInRsync.getAndAdd(result.getElapsedTimeInMs()); return result; } /** diff --git a/testutils/ptest2/src/main/java/org/apache/hive/ptest/execution/LocalCommand.java b/testutils/ptest2/src/main/java/org/apache/hive/ptest/execution/LocalCommand.java index b57320d2ada72c4cdccb59cdbc75feec84f1f183..cf9606ef7aa63b226892c361656c5aa978e67912 100644 --- a/testutils/ptest2/src/main/java/org/apache/hive/ptest/execution/LocalCommand.java +++ b/testutils/ptest2/src/main/java/org/apache/hive/ptest/execution/LocalCommand.java @@ -37,6 +37,7 @@ private final StreamReader streamReader; private Integer exitCode; private final int commandId; + private long elapsedTimeInMs; private final Stopwatch stopwatch = Stopwatch.createUnstarted(); public LocalCommand(Logger logger, OutputPolicy outputPolicy, String command) throws IOException { @@ -58,12 +59,20 @@ public int getExitCode() throws InterruptedException { } } + public long getElapsedTimeInMs() throws InterruptedException { + synchronized (process) { + awaitProcessCompletion(); + return elapsedTimeInMs; + } + } + private void awaitProcessCompletion() throws InterruptedException { synchronized (process) { if (exitCode == null) { exitCode = process.waitFor(); if (stopwatch.isRunning()) { stopwatch.stop(); + this.elapsedTimeInMs = stopwatch.elapsed(TimeUnit.MILLISECONDS); logger.info("Finished LocalCommandId={}. ElapsedTime(ms)={}", commandId, stopwatch.elapsed( TimeUnit.MILLISECONDS)); diff --git a/testutils/ptest2/src/main/java/org/apache/hive/ptest/execution/PTest.java b/testutils/ptest2/src/main/java/org/apache/hive/ptest/execution/PTest.java index 8df5162440cab6bf91e3d6efa259dbfa055c953e..2868ff0e2bf23fc8ca79887621a0465a43bb0216 100644 --- a/testutils/ptest2/src/main/java/org/apache/hive/ptest/execution/PTest.java +++ b/testutils/ptest2/src/main/java/org/apache/hive/ptest/execution/PTest.java @@ -26,8 +26,10 @@ import java.util.HashSet; import java.util.List; import java.util.Map; +import java.util.Map.Entry; import java.util.Set; import java.util.SortedSet; +import java.util.TreeMap; import java.util.TreeSet; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.CopyOnWriteArrayList; @@ -74,6 +76,7 @@ .getLogger(PTest.class); + // dummy patch private final TestConfiguration mConfiguration; private final ListeningExecutorService mExecutor; private final Set mAddedTests; @@ -182,6 +185,14 @@ public int run() { } finally { long elapsedTime = TimeUnit.MINUTES.convert((System.currentTimeMillis() - start), TimeUnit.MILLISECONDS); + Map perfMetrics = phase.getPerfMetrics(); + if (!perfMetrics.isEmpty()) { + mLogger.info("Adding perf metrics for " + phase.getClass().getSimpleName() + " phase"); + for (Entry perfEntry : perfMetrics.entrySet()) { + elapsedTimes.put(phase.getClass().getSimpleName() + "." + perfEntry.getKey(), + TimeUnit.MINUTES.convert(perfEntry.getValue(), TimeUnit.MILLISECONDS)); + } + } elapsedTimes.put(phase.getClass().getSimpleName(), elapsedTime); } } @@ -223,7 +234,7 @@ public int run() { } mLogger.info("Executed " + mExecutedTests.size() + " tests"); for(Map.Entry entry : elapsedTimes.entrySet()) { - mLogger.info(String.format("PERF: Phase %s took %d minutes", entry.getKey(), entry.getValue())); + mLogger.info(String.format("PERF: %s took %d minutes", entry.getKey(), entry.getValue())); } publishJiraComment(error, messages, failedTests, mAddedTests); if(error || !mFailedTests.isEmpty()) { diff --git a/testutils/ptest2/src/main/java/org/apache/hive/ptest/execution/Phase.java b/testutils/ptest2/src/main/java/org/apache/hive/ptest/execution/Phase.java index c049d65e43faff31d72a997ce66ac7e0a10cb5c5..34c66ce5db4a9db87d7f384a5d1c670b4442e817 100644 --- a/testutils/ptest2/src/main/java/org/apache/hive/ptest/execution/Phase.java +++ b/testutils/ptest2/src/main/java/org/apache/hive/ptest/execution/Phase.java @@ -20,10 +20,14 @@ import java.io.IOException; import java.util.List; +import java.util.Map; +import java.util.TreeMap; import java.util.concurrent.Callable; +import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.Executors; import java.util.concurrent.TimeUnit; +import com.google.common.collect.Maps; import org.apache.hive.ptest.execution.LocalCommand.CollectLogPolicy; import org.apache.hive.ptest.execution.ssh.NonZeroExitCodeException; import org.apache.hive.ptest.execution.ssh.RemoteCommandResult; @@ -44,6 +48,7 @@ private final LocalCommandFactory localCommandFactory; private final ImmutableMap templateDefaults; protected final Logger logger; + private Map perfMetrics; public Phase(List hostExecutors, LocalCommandFactory localCommandFactory, @@ -53,6 +58,7 @@ public Phase(List hostExecutors, this.localCommandFactory = localCommandFactory; this.templateDefaults = templateDefaults; this.logger = logger; + this.perfMetrics = new ConcurrentHashMap<>(); } public abstract void execute() throws Throwable; @@ -186,4 +192,16 @@ protected void execLocally(String command) protected ImmutableMap getTemplateDefaults() { return templateDefaults; } + + public Map getPerfMetrics() { + return ImmutableMap.copyOf(perfMetrics); + } + + public void addPerfMetric(final String metricKey, long value) { + perfMetrics.put(metricKey, Long.valueOf(value)); + } + + public void resetPerfMetrics() { + perfMetrics = new ConcurrentHashMap<>(); + } } diff --git a/testutils/ptest2/src/main/java/org/apache/hive/ptest/execution/ssh/RSyncCommand.java b/testutils/ptest2/src/main/java/org/apache/hive/ptest/execution/ssh/RSyncCommand.java index fbb1e7934de06fe50148d1be1b79d6692935a653..cadf2097f9798845c2ec6a5cc954ea7705306e16 100644 --- a/testutils/ptest2/src/main/java/org/apache/hive/ptest/execution/ssh/RSyncCommand.java +++ b/testutils/ptest2/src/main/java/org/apache/hive/ptest/execution/ssh/RSyncCommand.java @@ -22,6 +22,7 @@ private final RSyncCommandExecutor executor; private final String localFile; private final String remoteFile; + private long elapsedTimeInMs; private RSyncCommand.Type type; public RSyncCommand(RSyncCommandExecutor executor, String privateKey, String user, String host, int instance, @@ -35,17 +36,23 @@ public RSyncCommand(RSyncCommandExecutor executor, String privateKey, public RSyncCommand.Type getType() { return type; } + + public void setElapsedTimeInMs(long timeInMs) { + this.elapsedTimeInMs = timeInMs; + } + public String getLocalFile() { return localFile; } public String getRemoteFile() { return remoteFile; } + @Override public RSyncResult call() { executor.execute(this); return new RSyncResult(getUser(), getHost(), getInstance(), getLocalFile(), getRemoteFile(), - getExitCode(), getException(), getOutput()); + getExitCode(), getException(), getOutput(), getElapsedTimeInMs()); } @Override @@ -55,6 +62,10 @@ public String toString() { + getHost() + ", getInstance()=" + getInstance() + "]"; } + public long getElapsedTimeInMs() { + return elapsedTimeInMs; + } + public static enum Type { FROM_LOCAL(), TO_LOCAL(), diff --git a/testutils/ptest2/src/main/java/org/apache/hive/ptest/execution/ssh/RSyncCommandExecutor.java b/testutils/ptest2/src/main/java/org/apache/hive/ptest/execution/ssh/RSyncCommandExecutor.java index cd7bcf9d4d7e0ac9be338edf26035a89acc583e1..af06f20c721dd3a12ed54d0e38e77b58a009f879 100644 --- a/testutils/ptest2/src/main/java/org/apache/hive/ptest/execution/ssh/RSyncCommandExecutor.java +++ b/testutils/ptest2/src/main/java/org/apache/hive/ptest/execution/ssh/RSyncCommandExecutor.java @@ -93,6 +93,7 @@ public void execute(RSyncCommand command) { } } while (!mShutdown && retry); // an error occurred, re-try command.setExitCode(cmd.getExitCode()); + command.setElapsedTimeInMs(cmd.getElapsedTimeInMs()); } catch (IOException e) { command.setException(e); } catch (InterruptedException e) { diff --git a/testutils/ptest2/src/main/java/org/apache/hive/ptest/execution/ssh/RSyncResult.java b/testutils/ptest2/src/main/java/org/apache/hive/ptest/execution/ssh/RSyncResult.java index ae6bac866e99ebfa573c46c3ef3118b1efa04841..12a043515eb835b27c6a49cfa93cd2702b7e80a8 100644 --- a/testutils/ptest2/src/main/java/org/apache/hive/ptest/execution/ssh/RSyncResult.java +++ b/testutils/ptest2/src/main/java/org/apache/hive/ptest/execution/ssh/RSyncResult.java @@ -21,12 +21,14 @@ public class RSyncResult extends AbstractSSHResult { private final String localFile; private final String remoteFile; + private final long elapsedTimeInMs; public RSyncResult(String user, String host, int instance, String localFile, String remoteFile, int exitCode, - Exception exception, String output) { + Exception exception, String output, long elapsedTimeInMs) { super(user, host, instance, exitCode, exception, output); this.localFile = localFile; this.remoteFile = remoteFile; + this.elapsedTimeInMs = elapsedTimeInMs; } public String getLocalFile() { return localFile; @@ -34,6 +36,7 @@ public String getLocalFile() { public String getRemoteFile() { return remoteFile; } + public long getElapsedTimeInMs() { return elapsedTimeInMs; } @Override public String toString() { return "RSyncResult [localFile=" + localFile + ", remoteFile=" diff --git a/testutils/ptest2/src/test/java/org/apache/hive/ptest/execution/MockRSyncCommandExecutor.java b/testutils/ptest2/src/test/java/org/apache/hive/ptest/execution/MockRSyncCommandExecutor.java index 3906435422241608043ab98b0f5f365e09aef7bd..fd4749eedf1c81ba6ed20ff44d0a928e62f5aa46 100644 --- a/testutils/ptest2/src/test/java/org/apache/hive/ptest/execution/MockRSyncCommandExecutor.java +++ b/testutils/ptest2/src/test/java/org/apache/hive/ptest/execution/MockRSyncCommandExecutor.java @@ -18,10 +18,12 @@ */ package org.apache.hive.ptest.execution; +import java.security.SecureRandom; import java.util.LinkedList; import java.util.List; import java.util.Map; import java.util.Queue; +import java.util.Random; import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicInteger; @@ -68,6 +70,8 @@ public synchronized void execute(RSyncCommand command) { matchCount.incrementAndGet(); command.setExitCode(queue.remove()); } + //simulating dummy rsync delay of 17 msec + command.setElapsedTimeInMs(17L); } public int getMatchCount() { diff --git a/testutils/ptest2/src/test/java/org/apache/hive/ptest/execution/TestExecutionPhase.java b/testutils/ptest2/src/test/java/org/apache/hive/ptest/execution/TestExecutionPhase.java index 24c811e3f10f69d15aa60717044791e8f79f6cda..c32ce106ff44441b7de1c3d922656876f5c78c72 100644 --- a/testutils/ptest2/src/test/java/org/apache/hive/ptest/execution/TestExecutionPhase.java +++ b/testutils/ptest2/src/test/java/org/apache/hive/ptest/execution/TestExecutionPhase.java @@ -150,6 +150,33 @@ public void testFailingUnitTest() throws Throwable { Assert.assertEquals(Sets.newHashSet("SomeTest." + QFILENAME + " (batchId=1)"), failedTests); } + @Test + public void testPerfMetrics() throws Throwable { + //when test is successful + setupUnitTest(); + copyTestOutput("SomeTest-success.xml", succeededLogDir, testBatch.getName()); + Phase phase = getPhase(); + phase.execute(); + Assert.assertNotNull("Perf metrics should have been initialized", phase.getPerfMetrics()); + Assert.assertNotNull(ExecutionPhase.TOTAL_RSYNC_TIME + " should have been initialized", + phase.getPerfMetrics().get(ExecutionPhase.TOTAL_RSYNC_TIME)); + Assert.assertTrue("Total Rsync Elapsed time should have been greater than 0", + phase.getPerfMetrics().get(ExecutionPhase.TOTAL_RSYNC_TIME) > 0); + + //when test fails + setupUnitTest(); + sshCommandExecutor.putFailure("bash " + LOCAL_DIR + "/" + HOST + "-" + USER + + "-0/scratch/hiveptest-" + testBatch.getBatchId() + "_" + DRIVER + ".sh", 1); + copyTestOutput("SomeTest-failure.xml", failedLogDir, testBatch.getName()); + phase = getPhase(); + phase.execute(); + Assert.assertNotNull("Perf metrics should have been initialized", phase.getPerfMetrics()); + Assert.assertNotNull(ExecutionPhase.TOTAL_RSYNC_TIME + " should have been initialized", + phase.getPerfMetrics().get(ExecutionPhase.TOTAL_RSYNC_TIME)); + Assert.assertTrue("Total Rsync Elapsed time should have been greater than 0", + phase.getPerfMetrics().get(ExecutionPhase.TOTAL_RSYNC_TIME) > 0); + } + @Test(timeout = 20000) public void testTimedOutUnitTest() throws Throwable { setupUnitTest(3);