diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index 282f4cdb0b..b30a727112 100644 --- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -3168,6 +3168,10 @@ private static void populateLlapDaemonVarsSet(Set llapDaemonVarsSetLocal HIVE_PERF_LOGGER("hive.exec.perf.logger", "org.apache.hadoop.hive.ql.log.PerfLogger", "The class responsible for logging client side performance metrics. \n" + "Must be a subclass of org.apache.hadoop.hive.ql.log.PerfLogger"), + HIVE_PERF_LOGGER_V2("hive.exec.perf.loggerv2", + "org.apache.hadoop.hive.ql.log.MetricsPerfLogger", + "The class responsible for logging client side performance metrics. \n" + + "Must be a subclass of org.apache.hadoop.hive.ql.log.AbstractCachedPerfLogger"), HIVE_START_CLEANUP_SCRATCHDIR("hive.start.cleanup.scratchdir", false, "To cleanup the Hive scratchdir when starting the Hive Server"), HIVE_SCRATCH_DIR_LOCK("hive.scratchdir.lock", false, diff --git a/common/src/java/org/apache/hadoop/hive/ql/log/AbstractCachedPerfLogger.java b/common/src/java/org/apache/hadoop/hive/ql/log/AbstractCachedPerfLogger.java new file mode 100644 index 0000000000..92ebbf71bf --- /dev/null +++ b/common/src/java/org/apache/hadoop/hive/ql/log/AbstractCachedPerfLogger.java @@ -0,0 +1,27 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.log; + +/** + * If a PerfLogger is thread-safe and can be re-used for each request of a + * {@link PerfTimer}, it should extend this class so that it is cached. + */ +public abstract class AbstractCachedPerfLogger extends AbstractPerfLogger { + +} diff --git a/common/src/java/org/apache/hadoop/hive/ql/log/AbstractPerfLogger.java b/common/src/java/org/apache/hadoop/hive/ql/log/AbstractPerfLogger.java new file mode 100644 index 0000000000..09c1cc1d1b --- /dev/null +++ b/common/src/java/org/apache/hadoop/hive/ql/log/AbstractPerfLogger.java @@ -0,0 +1,60 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.log; + +import java.util.Optional; + +/** + * A logger interface for logging when actions with Hive occur and to time their + * duration. + */ +public abstract class AbstractPerfLogger { + + /** + * An action has started. The start time provided represents nanoseconds since + * some fixed but arbitrary origin time (perhaps in the future, so values may + * be negative). The start time provided to this method is meaningful only + * when compared to the end time later provided which is generated by the same + * instance of a Java virtual machine as the start time. + * + * @param sessionId Session ID generating this action (optional) + * @param clazz The class the logging is reference to + * @param action The action within the class to measure + * @param extra An arbitrary extra information to include in the log message + * @param startTime The time the action started. + */ + abstract void start(Optional sessionId, Class clazz, String action, Optional extra, + long startTime); + + /** + * An action has stopped. The start time provided represents nanoseconds since + * some fixed but arbitrary origin time (perhaps in the future, so values may + * be negative). The start time provided to this method is meaningful only + * when compared to the end time which is generated by the same instance of a + * Java virtual machine as the start time. + * + * @param clazz The class the logging is reference to + * @param action The action within the class to measure + * @param extra An arbitrary extra information to include in the log message + * @param stopTime The time the action started. + */ + abstract void stop(Class clazz, String action, Optional extra, + long stopTime); + +} diff --git a/common/src/java/org/apache/hadoop/hive/ql/log/LoggingPerfLogger.java b/common/src/java/org/apache/hadoop/hive/ql/log/LoggingPerfLogger.java new file mode 100644 index 0000000000..faed827bd8 --- /dev/null +++ b/common/src/java/org/apache/hadoop/hive/ql/log/LoggingPerfLogger.java @@ -0,0 +1,76 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.log; + +import java.util.Optional; +import java.util.concurrent.TimeUnit; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * A PerfLogger implementation that logs information to SLF4J facilities. + */ +public class LoggingPerfLogger extends AbstractCachedPerfLogger { + + protected final Logger log = LoggerFactory.getLogger(LoggingPerfLogger.class); + private long startTime; + private long stopTime; + + @Override + public void start(final Optional sessionId, final Class clazz, final String action, + final Optional extra, final long startTime) { + this.startTime = startTime; + if (log.isDebugEnabled()) { + log.debug("", + sessionId.orElse(""), clazz.getName(), action, extra.orElse(""), + TimeUnit.NANOSECONDS.toMillis(this.startTime)); + } + } + + @Override + void stop(final Class clazz, final String action, + final Optional extra, final long stopTime) { + this.stopTime = stopTime; + if (log.isDebugEnabled()) { + log.debug( + "", + clazz.getName(), action, extra.orElse(""), + TimeUnit.NANOSECONDS.toMillis(this.startTime), + TimeUnit.NANOSECONDS.toMillis(this.stopTime), + TimeUnit.NANOSECONDS.toMillis(this.stopTime - this.startTime)); + } + } + + public long getStartTime() { + return startTime; + } + + public long getStopTime() { + return stopTime; + } + + @Override + public String toString() { + return "LoggingPerfLogger [startTime=" + startTime + ", stopTime=" + stopTime + + "]"; + } + +} diff --git a/common/src/java/org/apache/hadoop/hive/ql/log/MetricsPerfLogger.java b/common/src/java/org/apache/hadoop/hive/ql/log/MetricsPerfLogger.java new file mode 100644 index 0000000000..c723a80020 --- /dev/null +++ b/common/src/java/org/apache/hadoop/hive/ql/log/MetricsPerfLogger.java @@ -0,0 +1,62 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.log; + +import java.util.Optional; + +import org.apache.hadoop.hive.common.metrics.common.Metrics; +import org.apache.hadoop.hive.common.metrics.common.MetricsConstant; +import org.apache.hadoop.hive.common.metrics.common.MetricsFactory; +import org.apache.hadoop.hive.common.metrics.common.MetricsScope; + +/** + * A PerfLogger that logs to SLF4J and provides data to the {@link Metrics} + * sub-system. + */ +public class MetricsPerfLogger extends LoggingPerfLogger { + + private Optional metrics = Optional.empty(); + private MetricsScope scope = null; + + @Override + public void start(final Optional sessionId, final Class clazz, + final String action, final Optional extra, final long startTime) { + super.start(sessionId, clazz, action, extra, startTime); + this.metrics = Optional.ofNullable(MetricsFactory.getInstance()); + if (this.metrics.isPresent()) { + this.scope = + this.metrics.get().createScope(MetricsConstant.API_PREFIX + action); + } + } + + @Override + void stop(final Class clazz, final String action, + final Optional extra, final long stopTime) { + if (metrics.isPresent()) { + this.metrics.get().endScope(this.scope); + } + super.stop(clazz, action, extra, stopTime); + } + + @Override + public String toString() { + return "MetricsPerfLogger [metrics=" + metrics + ", scope=" + scope + + ", toString()=" + super.toString() + "]"; + } +} diff --git a/common/src/java/org/apache/hadoop/hive/ql/log/PerfLogger.java b/common/src/java/org/apache/hadoop/hive/ql/log/PerfLogger.java index 2707987f0b..a198865186 100644 --- a/common/src/java/org/apache/hadoop/hive/ql/log/PerfLogger.java +++ b/common/src/java/org/apache/hadoop/hive/ql/log/PerfLogger.java @@ -36,6 +36,7 @@ * * Can be used to measure and log the time spent by a piece of code. */ +@Deprecated public class PerfLogger { public static final String ACQUIRE_READ_WRITE_LOCKS = "acquireReadWriteLocks"; public static final String COMPILE = "compile"; @@ -260,4 +261,4 @@ public void cleanupPerfLogMetrics() { } openScopes.clear(); } -} +} \ No newline at end of file diff --git a/common/src/java/org/apache/hadoop/hive/ql/log/PerfTimedAction.java b/common/src/java/org/apache/hadoop/hive/ql/log/PerfTimedAction.java new file mode 100644 index 0000000000..68151f8ff9 --- /dev/null +++ b/common/src/java/org/apache/hadoop/hive/ql/log/PerfTimedAction.java @@ -0,0 +1,94 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.log; + +/** + * An enumeration of all the measured actions. + */ +public enum PerfTimedAction { + + ACQUIRE_READ_WRITE_LOCKS("acquireReadWriteLocks"), + COMPILE("compile"), + WAIT_COMPILE("waitCompile"), + PARSE("parse"), + ANALYZE("semanticAnalyze"), + OPTIMIZER("optimizer"), + MATERIALIZED_VIEWS_REGISTRY_REFRESH("MaterializedViewsRegistryRefresh"), + DO_AUTHORIZATION("doAuthorization"), + DRIVER_EXECUTE("Driver.execute"), + INPUT_SUMMARY("getInputSummary"), + INPUT_PATHS("getInputPaths"), + GET_SPLITS("getSplits"), + RUN_TASKS("runTasks"), + SERIALIZE_PLAN("serializePlan"), + DESERIALIZE_PLAN("deserializePlan"), + CLONE_PLAN("clonePlan"), + RENAME_FILE("renameFile"), + REMOVE_TMP_DUP_FILES("removeTempOrDuplicateFiles"), + MOVE_FILE_STATUS("moveSpecifiedFileStatus"), + RENAME_MOVE_FILES("RenameOrMoveFiles"), + CREATE_EMPTY_BUCKETS("createEmptyBuckets"), + RELEASE_LOCKS("releaseLocks"), + PRUNE_LISTING("prune-listing"), + PARTITION_RETRIEVING("partition-retrieving"), + PRE_HOOK("PreHook"), + POST_HOOK("PostHook"), + FAILURE_HOOK("FailureHook"), + TEZ_COMPILER("TezCompiler"), + TEZ_SUBMIT_TO_RUNNING("TezSubmitToRunningDag"), + TEZ_BUILD_DAG("TezBuildDag"), + TEZ_SUBMIT_DAG("TezSubmitDag"), + TEZ_RUN_DAG("TezRunDag"), + TEZ_CREATE_VERTEX("TezCreateVertex"), + TEZ_RUN_VERTEX("TezRunVertex"), + TEZ_INITIALIZE_PROCESSOR("TezInitializeProcessor"), + TEZ_RUN_PROCESSOR("TezRunProcessor"), + TEZ_INIT_OPERATORS("TezInitializeOperators"), + TEZ_GET_SESSION("TezGetSession"), + LOAD_HASHTABLE("LoadHashtable"), + SAVE_TO_RESULTS_CACHE("saveToResultsCache"), + SPARK_SUBMIT_TO_RUNNING("SparkSubmitToRunning"), + SPARK_BUILD_PLAN("SparkBuildPlan"), + SPARK_BUILD_RDD_GRAPH("SparkBuildRDDGraph"), + SPARK_CREATE_EXPLAIN_PLAN("SparkCreateExplainPlan"), + SPARK_SUBMIT_JOB("SparkSubmitJob"), + SPARK_RUN_JOB("SparkRunJob"), + SPARK_CREATE_TRAN("SparkCreateTran"), + SPARK_RUN_STAGE("SparkRunStage"), + SPARK_INIT_OPERATORS("SparkInitializeOperators"), + SPARK_GENERATE_TASK_TREE("SparkGenerateTaskTree"), + SPARK_OPTIMIZE_OPERATOR_TREE("SparkOptimizeOperatorTree"), + SPARK_OPTIMIZE_TASK_TREE("SparkOptimizeTaskTree"), + SPARK_FLUSH_HASHTABLE("SparkFlushHashTable"), + SPARK_DYNAMICALLY_PRUNE_PARTITIONS("SparkDynamicallyPrunePartitions"), + FILE_MOVES("FileMoves"), + LOAD_TABLE("LoadTable"), + LOAD_PARTITION("LoadPartition"), + LOAD_DYNAMIC_PARTITIONS("LoadDynamicPartitions"); + + private final String name; + + private PerfTimedAction(final String name) { + this.name = name; + } + + public String getName() { + return name; + } +} diff --git a/common/src/java/org/apache/hadoop/hive/ql/log/PerfTimer.java b/common/src/java/org/apache/hadoop/hive/ql/log/PerfTimer.java new file mode 100644 index 0000000000..b14c2369b0 --- /dev/null +++ b/common/src/java/org/apache/hadoop/hive/ql/log/PerfTimer.java @@ -0,0 +1,92 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.log; + +import java.util.Optional; + +import com.google.common.base.Preconditions; + +/** + * A tool to provide timing of time-critical sections of the code. This class + * extends {@link AutoCloseable} and should be used in conjunction with + * try-with-resources with block. + * + *
+ * try (PerfTimer timer = PerfTimerFactory.getTimer()) {
+ * ...
+ * }
+ * 
+ */ +public class PerfTimer implements AutoCloseable { + + private final Optional sessionId; + private final Class clazz; + private final PerfTimedAction action; + private final AbstractPerfLogger log; + private final Optional extra; + private final long startTime; + private long stopTime = -1L; + private boolean isClosed = false; + + public PerfTimer(final Optional sessionId, final Class clazz, + final PerfTimedAction action, final Optional extra, + final AbstractPerfLogger perfLogger) { + this.sessionId = sessionId; + this.clazz = clazz; + this.action = action; + this.log = perfLogger; + this.extra = extra; + this.startTime = System.nanoTime(); + this.log.start(sessionId, clazz, action.getName(), extra, this.startTime); + } + + public Optional getSessionId() { + return sessionId; + } + + public long getStartTime() { + return startTime; + } + + public long getStopTime() { + return stopTime; + } + + public long getDuration() { + Preconditions.checkState(isClosed); + return this.stopTime - this.startTime; + } + + @Override + public void close() { + if (!isClosed) { + this.stopTime = System.nanoTime(); + this.isClosed = true; + log.stop(this.clazz, this.action.getName(), extra, this.stopTime); + } + } + + @Override + public String toString() { + return "PerfTimer [sessionId=" + sessionId + ", clazz=" + clazz + + ", action=" + action + ", extra=" + extra + ", startTime=" + startTime + + ", stopTime=" + stopTime + ", isClosed=" + isClosed + "]"; + } + +} diff --git a/common/src/java/org/apache/hadoop/hive/ql/log/PerfTimerFactory.java b/common/src/java/org/apache/hadoop/hive/ql/log/PerfTimerFactory.java new file mode 100644 index 0000000000..436111a3f9 --- /dev/null +++ b/common/src/java/org/apache/hadoop/hive/ql/log/PerfTimerFactory.java @@ -0,0 +1,83 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.log; + +import java.util.Optional; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.ConcurrentMap; + +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.util.ReflectionUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * PerfTimerFactory. + * + * Can be used to measure and log the time spent by a piece of code. + */ +public final class PerfTimerFactory { + + static private final Logger LOG = + LoggerFactory.getLogger(PerfTimerFactory.class); + + private static final ConcurrentMap, AbstractPerfLogger> LOGGER_CACHE = + new ConcurrentHashMap<>(); + + private PerfTimerFactory() { + } + + public static PerfTimer getPerfTimer(final Optional sessionId, + final Optional conf, final Class clazz, + final PerfTimedAction action) { + return getPerfTimer(sessionId, conf, clazz, action, Optional.empty()); + } + + public static PerfTimer getPerfTimer(final Optional sessionId, + final Optional conf, final Class clazz, + final PerfTimedAction action, final Optional extra) { + Class perfLoggerClass = MetricsPerfLogger.class; + try { + if (conf.isPresent()) { + perfLoggerClass = conf.get().getClassByName( + conf.get().getVar(HiveConf.ConfVars.HIVE_PERF_LOGGER_V2)); + } + } catch (ClassNotFoundException e) { + LOG.error( + "Using class {}. Configured performance logger class not found: {}", + MetricsPerfLogger.class.getName(), e.getMessage()); + } + + final boolean isCacheable = + AbstractCachedPerfLogger.class.isAssignableFrom(perfLoggerClass); + + final AbstractPerfLogger newPerfLogger; + if (isCacheable) { + newPerfLogger = LOGGER_CACHE.computeIfAbsent(perfLoggerClass, (key) -> { + return (AbstractPerfLogger) ReflectionUtils.newInstance(key, + conf.orElse(null)); + }); + } else { + newPerfLogger = (AbstractPerfLogger) ReflectionUtils + .newInstance(perfLoggerClass, conf.orElse(null)); + } + + return new PerfTimer(sessionId, clazz, action, extra, newPerfLogger); + } +} diff --git a/ql/src/java/org/apache/hadoop/hive/ql/Driver.java b/ql/src/java/org/apache/hadoop/hive/ql/Driver.java index 91910d1c0c..e95bb9abb3 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/Driver.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/Driver.java @@ -22,7 +22,6 @@ import java.io.DataInput; import java.io.IOException; import java.io.PrintStream; -import java.io.Serializable; import java.net.InetAddress; import java.util.ArrayList; import java.util.Collection; @@ -35,6 +34,7 @@ import java.util.Map; import java.util.Queue; import java.util.Set; +import java.util.concurrent.TimeUnit; import java.util.stream.Collectors; import org.apache.commons.lang3.tuple.ImmutablePair; @@ -92,6 +92,8 @@ import org.apache.hadoop.hive.ql.lockmgr.HiveTxnManager; import org.apache.hadoop.hive.ql.lockmgr.LockException; import org.apache.hadoop.hive.ql.log.PerfLogger; +import org.apache.hadoop.hive.ql.log.PerfTimedAction; +import org.apache.hadoop.hive.ql.log.PerfTimer; import org.apache.hadoop.hive.ql.metadata.AuthorizationException; import org.apache.hadoop.hive.ql.metadata.Hive; import org.apache.hadoop.hive.ql.metadata.HiveException; @@ -135,8 +137,7 @@ public class Driver implements IDriver { - static final private String CLASS_NAME = Driver.class.getName(); - private static final Logger LOG = LoggerFactory.getLogger(CLASS_NAME); + private static final Logger LOG = LoggerFactory.getLogger(Driver.class); static final private LogHelper console = new LogHelper(LOG); private static final int SHUTDOWN_HOOK_PRIORITY = 0; private final QueryInfo queryInfo; @@ -352,7 +353,7 @@ public void compile(String command, boolean resetTaskIds, boolean deferClose) th createTransactionManager(); PerfLogger perfLogger = SessionState.getPerfLogger(); - perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.COMPILE); + perfLogger.PerfLogBegin(Driver.class.getName(), PerfLogger.COMPILE); driverState.compilingWithLocking(); command = new VariableSubstitution(new HiveVariableSource() { @@ -371,7 +372,7 @@ public void compile(String command, boolean resetTaskIds, boolean deferClose) th LOG.warn("WARNING! Query command could not be redacted." + e); } - checkInterrupted("at beginning of compilation.", null, null); + checkInterrupted("at beginning of compilation.", null); if (ctx != null && ctx.getExplainAnalyze() != AnalyzeState.RUNNING) { // close the existing ctx etc before compiling a new query, but does not destroy driver @@ -413,7 +414,7 @@ public void compile(String command, boolean resetTaskIds, boolean deferClose) th boolean parseError = false; try { - checkInterrupted("before parsing and analysing the query", null, null); + checkInterrupted("before parsing and analysing the query", null); if (ctx == null) { ctx = new Context(conf); @@ -425,7 +426,7 @@ public void compile(String command, boolean resetTaskIds, boolean deferClose) th ctx.setCmd(command); ctx.setHDFSCleanup(true); - perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.PARSE); + perfLogger.PerfLogBegin(Driver.class.getName(), PerfLogger.PARSE); // Trigger query hook before compilation hookRunner.runBeforeParseHook(command); @@ -439,13 +440,13 @@ public void compile(String command, boolean resetTaskIds, boolean deferClose) th } finally { hookRunner.runAfterParseHook(command, parseError); } - perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.PARSE); + perfLogger.PerfLogEnd(Driver.class.getName(), PerfLogger.PARSE); hookRunner.runBeforeCompileHook(command); // clear CurrentFunctionsInUse set, to capture new set of functions // that SemanticAnalyzer finds are in use SessionState.get().getCurrentFunctionsInUse().clear(); - perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.ANALYZE); + perfLogger.PerfLogBegin(Driver.class.getName(), PerfLogger.ANALYZE); // Flush the metastore cache. This assures that we don't pick up objects from a previous // query running in this same thread. This has to be done after we get our semantic @@ -495,9 +496,9 @@ public void compile(String command, boolean resetTaskIds, boolean deferClose) th // validate the plan sem.validate(); - perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.ANALYZE); + perfLogger.PerfLogEnd(Driver.class.getName(), PerfLogger.ANALYZE); - checkInterrupted("after analyzing query.", null, null); + checkInterrupted("after analyzing query.", null); // get the output schema schema = getSchema(sem, conf); @@ -519,8 +520,8 @@ public void compile(String command, boolean resetTaskIds, boolean deferClose) th if (!sem.skipAuthorization() && HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_AUTHORIZATION_ENABLED)) { - try { - perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.DO_AUTHORIZATION); + try (PerfTimer analyzeTimer = SessionState.getPerfTimer(Driver.class, + PerfTimedAction.DO_AUTHORIZATION)) { // Authorization check for kill query will be in KillQueryImpl // As both admin or operation owner can perform the operation. // Which is not directly supported in authorizer @@ -528,10 +529,10 @@ public void compile(String command, boolean resetTaskIds, boolean deferClose) th CommandAuthorizer.doAuthorization(queryState.getHiveOperation(), sem, command); } } catch (AuthorizationException authExp) { - console.printError("Authorization failed:" + authExp.getMessage() + ". Use SHOW GRANT to get more details."); - throw createProcessorException(403, authExp.getMessage(), "42000", null); - } finally { - perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.DO_AUTHORIZATION); + console.printError("Authorization failed:" + authExp.getMessage() + + ". Use SHOW GRANT to get more details."); + throw createProcessorException(403, authExp.getMessage(), "42000", + null); } } @@ -551,7 +552,7 @@ public void compile(String command, boolean resetTaskIds, boolean deferClose) th } catch (CommandProcessorException cpe) { throw cpe; } catch (Exception e) { - checkInterrupted("during query compilation: " + e.getMessage(), null, null); + checkInterrupted("during query compilation: " + e.getMessage(), null); compileError = true; ErrorMsg error = ErrorMsg.getErrorMsg(e.getMessage()); @@ -583,8 +584,10 @@ public void compile(String command, boolean resetTaskIds, boolean deferClose) th LOG.warn("Failed when invoking query after-compilation hook.", e); } } + } - double duration = perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.COMPILE)/1000.00; + final long duration = + perfLogger.PerfLogEnd(Driver.class.getName(), PerfLogger.COMPILE); ImmutableMap compileHMSTimings = dumpMetaCallTimingWithoutEx("compilation"); queryDisplay.setHmsTimings(QueryDisplay.Phase.COMPILATION, compileHMSTimings); @@ -593,13 +596,16 @@ public void compile(String command, boolean resetTaskIds, boolean deferClose) th closeInProcess(true); } - if (isInterrupted) { - driverState.compilationInterruptedWithLocking(deferClose); - LOG.info("Compiling command(queryId=" + queryId + ") has been interrupted after " + duration + " seconds"); - } else { - driverState.compilationFinishedWithLocking(compileError); - LOG.info("Completed compiling command(queryId=" + queryId + "); Time taken: " + duration + " seconds"); - } + if (isInterrupted) { + driverState.compilationInterruptedWithLocking(deferClose); + LOG.info( + "Compiling command(queryId={}) has been interrupted after {} seconds", + queryId, TimeUnit.MILLISECONDS.toSeconds(duration)); + } else { + driverState.compilationFinishedWithLocking(compileError); + LOG.info( + "Completed compiling command(queryId={}); Time taken: {} seconds", + queryId, TimeUnit.MILLISECONDS.toSeconds(duration)); } } @@ -825,14 +831,14 @@ private boolean startImplicitTxn(HiveTxnManager txnManager) throws LockException return shouldOpenImplicitTxn; } - private void checkInterrupted(String msg, HookContext hookContext, PerfLogger perfLogger) + private void checkInterrupted(String msg, HookContext hookContext) throws CommandProcessorException { if (driverState.isAborted()) { String errorMessage = "FAILED: command has been interrupted: " + msg; console.printError(errorMessage); if (hookContext != null) { try { - invokeFailureHooks(perfLogger, hookContext, errorMessage, null); + invokeFailureHooks(hookContext, errorMessage, null); } catch (Exception e) { LOG.warn("Caught exception attempting to invoke Failure Hooks", e); } @@ -1066,8 +1072,6 @@ private String getUserFromUGI() throws CommandProcessorException { * @throws CommandProcessorException **/ private void acquireLocks() throws CommandProcessorException { - PerfLogger perfLogger = SessionState.getPerfLogger(); - perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.ACQUIRE_READ_WRITE_LOCKS); if(!queryTxnMgr.isTxnOpen() && queryTxnMgr.supportsAcid()) { /*non acid txn managers don't support txns but fwd lock requests to lock managers @@ -1076,7 +1080,9 @@ private void acquireLocks() throws CommandProcessorException { which by definition needs no locks*/ return; } - try { + + try (PerfTimer acquireLocksTimer = SessionState.getPerfTimer(Driver.class, + PerfTimedAction.ACQUIRE_READ_WRITE_LOCKS)) { String userFromUGI = getUserFromUGI(); // Set the table write id in all of the acid file sinks @@ -1146,8 +1152,6 @@ private void acquireLocks() throws CommandProcessorException { String errorMessage = "FAILED: Error in acquiring locks: " + e.getMessage(); console.printError(errorMessage, "\n" + StringUtils.stringifyException(e)); throw createProcessorException(10, errorMessage, ErrorMsg.findSQLState(e.getMessage()), e); - } finally { - perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.ACQUIRE_READ_WRITE_LOCKS); } } @@ -1163,46 +1167,43 @@ public void releaseLocksAndCommitOrRollback(boolean commit) throws LockException **/ @VisibleForTesting public void releaseLocksAndCommitOrRollback(boolean commit, HiveTxnManager txnManager) throws LockException { - PerfLogger perfLogger = SessionState.getPerfLogger(); - perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.RELEASE_LOCKS); - HiveTxnManager txnMgr; - if (txnManager == null) { + try (PerfTimer releaseLocksTimer = SessionState.getPerfTimer(Driver.class, + PerfTimedAction.RELEASE_LOCKS)) { // Default to driver's txn manager if no txn manager specified - txnMgr = queryTxnMgr; - } else { - txnMgr = txnManager; - } - // If we've opened a transaction we need to commit or rollback rather than explicitly - // releasing the locks. - conf.unset(ValidTxnList.VALID_TXNS_KEY); - conf.unset(ValidTxnWriteIdList.VALID_TABLES_WRITEIDS_KEY); - if(!checkConcurrency()) { - return; - } - if (txnMgr.isTxnOpen()) { - if (commit) { - if(conf.getBoolVar(ConfVars.HIVE_IN_TEST) && conf.getBoolVar(ConfVars.HIVETESTMODEROLLBACKTXN)) { + final HiveTxnManager txnMgr = + (txnManager == null) ? queryTxnMgr : txnManager; + + // If we've opened a transaction we need to commit or rollback rather than + // explicitly releasing the locks. + conf.unset(ValidTxnList.VALID_TXNS_KEY); + conf.unset(ValidTxnWriteIdList.VALID_TABLES_WRITEIDS_KEY); + if (!checkConcurrency()) { + return; + } + if (txnMgr.isTxnOpen()) { + if (commit) { + if (conf.getBoolVar(ConfVars.HIVE_IN_TEST) + && conf.getBoolVar(ConfVars.HIVETESTMODEROLLBACKTXN)) { + txnMgr.rollbackTxn(); + } else { + // both commit & rollback clear ALL locks for this tx + txnMgr.commitTxn(); + } + } else { txnMgr.rollbackTxn(); } - else { - txnMgr.commitTxn();//both commit & rollback clear ALL locks for this tx - } } else { - txnMgr.rollbackTxn(); + // since there is no tx, we only have locks for current query (if any) + if (ctx != null && ctx.getHiveLocks() != null) { + hiveLocks.addAll(ctx.getHiveLocks()); + } + txnMgr.releaseLocks(hiveLocks); } - } else { - //since there is no tx, we only have locks for current query (if any) - if (ctx != null && ctx.getHiveLocks() != null) { - hiveLocks.addAll(ctx.getHiveLocks()); + hiveLocks.clear(); + if (ctx != null) { + ctx.setHiveLocks(null); } - txnMgr.releaseLocks(hiveLocks); - } - hiveLocks.clear(); - if (ctx != null) { - ctx.setHiveLocks(null); } - - perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.RELEASE_LOCKS); } /** @@ -1320,12 +1321,14 @@ private void compileInternal(String command, boolean deferClose) throws CommandP } PerfLogger perfLogger = SessionState.getPerfLogger(true); - perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.WAIT_COMPILE); - - try (CompileLock compileLock = CompileLockFactory.newInstance(conf, command)) { - boolean success = compileLock.tryAcquire(); + try (CompileLock compileLock = + CompileLockFactory.newInstance(conf, command)) { - perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.WAIT_COMPILE); + final boolean success; + try (PerfTimer releaseLocksTimer = SessionState.getPerfTimer(Driver.class, + PerfTimedAction.WAIT_COMPILE)) { + success = compileLock.tryAcquire(); + } if (metrics != null) { metrics.decrementCounter(MetricsConstant.WAITING_COMPILE_OPS, 1); @@ -1405,7 +1408,7 @@ private void runInternal(String command, boolean alreadyCompiled) throws Command // same instance of Driver, which can run multiple queries. ctx.setHiveTxnManager(queryTxnMgr); - checkInterrupted("at acquiring the lock.", null, null); + checkInterrupted("at acquiring the lock.", null); lockAndRespond(); @@ -1635,37 +1638,37 @@ private void postExecutionCacheActions() throws Exception { } else if (cacheUsage.getStatus() == CacheUsage.CacheStatus.CAN_CACHE_QUERY_RESULTS && cacheUsage.getCacheEntry() != null && plan.getFetchTask() != null) { + // Save results to the cache for future queries to use. - PerfLogger perfLogger = SessionState.getPerfLogger(); - perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.SAVE_TO_RESULTS_CACHE); + try (PerfTimer compileTimer = SessionState.getPerfTimer(Driver.class, + PerfTimedAction.SAVE_TO_RESULTS_CACHE)) { - ValidTxnWriteIdList txnWriteIdList = null; - if (plan.hasAcidResourcesInQuery()) { - txnWriteIdList = AcidUtils.getValidTxnWriteIdList(conf); - } - CacheEntry cacheEntry = cacheUsage.getCacheEntry(); - boolean savedToCache = QueryResultsCache.getInstance().setEntryValid( - cacheEntry, - plan.getFetchTask().getWork()); - LOG.info("savedToCache: {} ({})", savedToCache, cacheEntry); - if (savedToCache) { - useFetchFromCache(cacheUsage.getCacheEntry()); - // setEntryValid() already increments the reader count. Set usedCacheEntry so it gets released. - this.usedCacheEntry = cacheUsage.getCacheEntry(); + ValidTxnWriteIdList txnWriteIdList = null; + if (plan.hasAcidResourcesInQuery()) { + txnWriteIdList = AcidUtils.getValidTxnWriteIdList(conf); + } + CacheEntry cacheEntry = cacheUsage.getCacheEntry(); + boolean savedToCache = QueryResultsCache.getInstance() + .setEntryValid(cacheEntry, plan.getFetchTask().getWork()); + LOG.info("savedToCache: {} ({})", savedToCache, cacheEntry); + if (savedToCache) { + useFetchFromCache(cacheUsage.getCacheEntry()); + // setEntryValid() already increments the reader count. Set + // usedCacheEntry so it gets released. + this.usedCacheEntry = cacheUsage.getCacheEntry(); + } } - - perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.SAVE_TO_RESULTS_CACHE); } } } private void execute() throws CommandProcessorException { PerfLogger perfLogger = SessionState.getPerfLogger(); - perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.DRIVER_EXECUTE); + perfLogger.PerfLogBegin(Driver.class.getName(), PerfLogger.DRIVER_EXECUTE); boolean noName = Strings.isNullOrEmpty(conf.get(MRJobConfig.JOB_NAME)); - int maxlen; + final int maxlen; if ("spark".equals(conf.getVar(ConfVars.HIVE_EXECUTION_ENGINE))) { maxlen = conf.getIntVar(HiveConf.ConfVars.HIVESPARKJOBNAMELENGTH); } else { @@ -1717,7 +1720,7 @@ private void execute() throws CommandProcessorException { SessionState ss = SessionState.get(); // TODO: should this use getUserFromAuthenticator? - hookContext = new PrivateHookContext(plan, queryState, ctx.getPathToCS(), SessionState.get().getUserName(), + hookContext = new PrivateHookContext(plan, queryState, ctx.getPathToCS(), ss.getUserName(), ss.getUserIpAddress(), InetAddress.getLocalHost().getHostAddress(), operationId, ss.getSessionId(), Thread.currentThread().getName(), ss.isHiveServerQuery(), perfLogger, queryInfo, ctx); hookContext.setHookType(HookContext.HookType.PRE_EXEC_HOOK); @@ -1749,7 +1752,7 @@ private void execute() throws CommandProcessorException { // At any time, at most maxthreads tasks can be running // The main thread polls the TaskRunners to check if they have finished. - checkInterrupted("before running tasks.", hookContext, perfLogger); + checkInterrupted("before running tasks.", hookContext); DriverContext driverCxt = new DriverContext(ctx); driverCxt.prepare(plan); @@ -1775,7 +1778,7 @@ private void execute() throws CommandProcessorException { preExecutionCacheActions(); - perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.RUN_TASKS); + perfLogger.PerfLogBegin(Driver.class.getName(), PerfLogger.RUN_TASKS); // Loop while you either have tasks running, or tasks queued up while (driverCxt.isRunning()) { // Launch upto maxthreads tasks @@ -1811,7 +1814,7 @@ private void execute() throws CommandProcessorException { TaskResult result = tskRun.getTaskResult(); int exitVal = result.getExitVal(); - checkInterrupted("when checking the execution result.", hookContext, perfLogger); + checkInterrupted("when checking the execution result.", hookContext); if (exitVal != 0) { Task backupTask = tsk.getAndInitBackupTask(); @@ -1832,7 +1835,7 @@ private void execute() throws CommandProcessorException { if (driverCxt.isShutdown()) { errorMessage = "FAILED: Operation cancelled. " + errorMessage; } - invokeFailureHooks(perfLogger, hookContext, + invokeFailureHooks(hookContext, errorMessage + Strings.nullToEmpty(tsk.getDiagnosticsMessage()), result.getTaskError()); String sqlState = "08S01"; @@ -1871,7 +1874,7 @@ private void execute() throws CommandProcessorException { } } } - perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.RUN_TASKS); + perfLogger.PerfLogEnd(Driver.class.getName(), PerfLogger.RUN_TASKS); postExecutionCacheActions(); @@ -1881,7 +1884,7 @@ private void execute() throws CommandProcessorException { if (driverCxt.isShutdown()) { String errorMessage = "FAILED: Operation cancelled"; - invokeFailureHooks(perfLogger, hookContext, errorMessage, null); + invokeFailureHooks(hookContext, errorMessage, null); console.printError(errorMessage); throw createProcessorException(1000, errorMessage, "HY008", null); } @@ -1917,7 +1920,7 @@ private void execute() throws CommandProcessorException { } catch (Throwable e) { executionError = true; - checkInterrupted("during query execution: \n" + e.getMessage(), hookContext, perfLogger); + checkInterrupted("during query execution: \n" + e.getMessage(), hookContext); ctx.restoreOriginalTracker(); if (SessionState.get() != null) { @@ -1928,7 +1931,7 @@ private void execute() throws CommandProcessorException { String errorMessage = "FAILED: Hive Internal Error: " + Utilities.getNameMessage(e); if (hookContext != null) { try { - invokeFailureHooks(perfLogger, hookContext, errorMessage, e); + invokeFailureHooks(hookContext, errorMessage, e); } catch (Exception t) { LOG.warn("Failed to invoke failure hook", t); } @@ -1949,7 +1952,8 @@ private void execute() throws CommandProcessorException { if (noName) { conf.set(MRJobConfig.JOB_NAME, ""); } - double duration = perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.DRIVER_EXECUTE)/1000.00; + long duration = perfLogger.PerfLogEnd(Driver.class.getName(), + PerfLogger.DRIVER_EXECUTE); ImmutableMap executionHMSTimings = dumpMetaCallTimingWithoutEx("execution"); queryDisplay.setHmsTimings(QueryDisplay.Phase.EXECUTION, executionHMSTimings); @@ -1982,9 +1986,13 @@ private void execute() throws CommandProcessorException { driverState.unlock(); } if (driverState.isAborted()) { - LOG.info("Executing command(queryId=" + queryId + ") has been interrupted after " + duration + " seconds"); + LOG.info( + "Executing command(queryId={}) has been interrupted after {} seconds", + queryId, TimeUnit.MILLISECONDS.toSeconds(duration)); } else { - LOG.info("Completed executing command(queryId=" + queryId + "); Time taken: " + duration + " seconds"); + LOG.info( + "Completed executing command(queryId={}); Time taken: {} seconds", + queryId, TimeUnit.MILLISECONDS.toSeconds(duration)); } } } @@ -2072,7 +2080,7 @@ private String getErrorMsgAndDetail(int exitVal, Throwable downstreamError, Task return errorMessage; } - private void invokeFailureHooks(PerfLogger perfLogger, + private void invokeFailureHooks( HookContext hookContext, String errorMessage, Throwable exception) throws Exception { hookContext.setHookType(HookContext.HookType.ON_FAILURE_HOOK); hookContext.setErrorMessage(errorMessage); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinOperator.java index 0643a54753..7e1a241025 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinOperator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinOperator.java @@ -57,7 +57,8 @@ import org.apache.hadoop.hive.ql.exec.tez.LlapObjectCache; import org.apache.hadoop.hive.ql.exec.tez.LlapObjectSubCache; import org.apache.hadoop.hive.ql.io.HiveKey; -import org.apache.hadoop.hive.ql.log.PerfLogger; +import org.apache.hadoop.hive.ql.log.PerfTimer; +import org.apache.hadoop.hive.ql.log.PerfTimedAction; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.plan.JoinCondDesc; import org.apache.hadoop.hive.ql.plan.JoinDesc; @@ -90,8 +91,6 @@ private static final long serialVersionUID = 1L; private static final Logger LOG = LoggerFactory.getLogger(MapJoinOperator.class.getName()); - private static final String CLASS_NAME = MapJoinOperator.class.getName(); - private transient final PerfLogger perfLogger = SessionState.getPerfLogger(); private transient String cacheKey; private transient ObjectCache cache; @@ -380,32 +379,32 @@ public void generateMapMetaData() throws HiveException { // Core logic to load hash table using HashTableLoader private Pair loadHashTableInternal( ExecMapperContext mapContext, MapredContext mrContext) throws HiveException { - perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.LOAD_HASHTABLE); - loader.init(mapContext, mrContext, hconf, this); - try { - loader.load(mapJoinTables, mapJoinTableSerdes); - } catch (HiveException e) { - if (LOG.isInfoEnabled()) { - LOG.info("Exception loading hash tables. Clearing partially loaded hash table containers."); - } - - // there could be some spilled partitions which needs to be cleaned up - clearAllTableContainers(); - throw e; - } - hashTblInitedOnce = true; + try (PerfTimer compileTimer = SessionState + .getPerfTimer(MapJoinOperator.class, PerfTimedAction.LOAD_HASHTABLE)) { + loader.init(mapContext, mrContext, hconf, this); + try { + loader.load(mapJoinTables, mapJoinTableSerdes); + } catch (HiveException e) { + LOG.info( + "Exception loading hash tables. Clearing partially loaded hash table containers."); + + // there could be some spilled partitions which needs to be cleaned up + clearAllTableContainers(); + throw e; + } - Pair pair = - new ImmutablePair<> (mapJoinTables, mapJoinTableSerdes); + hashTblInitedOnce = true; - perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.LOAD_HASHTABLE); + Pair pair = + new ImmutablePair<>(mapJoinTables, mapJoinTableSerdes); - if (canSkipJoinProcessing(mapContext)) { - LOG.info("Skipping big table join processing for " + this.toString()); - this.setDone(true); + if (canSkipJoinProcessing(mapContext)) { + LOG.info("Skipping big table join processing for {}", this); + this.setDone(true); + } + return pair; } - return pair; } // Load Hash table for Bucket MapJoin diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/MoveTask.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/MoveTask.java index 695d08bbe2..76b016a6b4 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/MoveTask.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/MoveTask.java @@ -44,8 +44,9 @@ import org.apache.hadoop.hive.ql.lockmgr.HiveLockManager; import org.apache.hadoop.hive.ql.lockmgr.HiveLockObj; import org.apache.hadoop.hive.ql.lockmgr.LockException; +import org.apache.hadoop.hive.ql.log.PerfTimedAction; +import org.apache.hadoop.hive.ql.log.PerfTimer; import org.apache.hadoop.hive.ql.exec.repl.util.ReplUtils; -import org.apache.hadoop.hive.ql.log.PerfLogger; import org.apache.hadoop.hive.ql.metadata.Hive; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.metadata.Partition; @@ -91,9 +92,8 @@ public MoveTask() { private void moveFile(Path sourcePath, Path targetPath, boolean isDfsDir) throws HiveException { - try { - PerfLogger perfLogger = SessionState.getPerfLogger(); - perfLogger.PerfLogBegin("MoveTask", PerfLogger.FILE_MOVES); + try (PerfTimer initTimer = SessionState.getPerfTimer(MoveTask.class, + PerfTimedAction.FILE_MOVES)) { String mesg = "Moving data to " + (isDfsDir ? "" : "local ") + "directory " + targetPath.toString(); @@ -108,8 +108,6 @@ private void moveFile(Path sourcePath, Path targetPath, boolean isDfsDir) FileSystem dstFs = FileSystem.getLocal(conf); moveFileFromDfsToLocal(sourcePath, targetPath, fs, dstFs); } - - perfLogger.PerfLogEnd("MoveTask", PerfLogger.FILE_MOVES); } catch (Exception e) { throw new HiveException("Unable to move source " + sourcePath + " to destination " + targetPath, e); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/SerializationUtilities.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/SerializationUtilities.java index e205c08d84..5c9c020dfb 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/SerializationUtilities.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/SerializationUtilities.java @@ -46,7 +46,8 @@ import org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat; import org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat; import org.apache.hadoop.hive.ql.io.RCFileInputFormat; -import org.apache.hadoop.hive.ql.log.PerfLogger; +import org.apache.hadoop.hive.ql.log.PerfTimedAction; +import org.apache.hadoop.hive.ql.log.PerfTimer; import org.apache.hadoop.hive.ql.plan.AbstractOperatorDesc; import org.apache.hadoop.hive.ql.plan.BaseWork; import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc; @@ -79,8 +80,8 @@ * Utilities related to serialization and deserialization. */ public class SerializationUtilities { - private static final String CLASS_NAME = SerializationUtilities.class.getName(); - private static final Logger LOG = LoggerFactory.getLogger(CLASS_NAME); + private static final Logger LOG = + LoggerFactory.getLogger(SerializationUtilities.class); public static class Hook { public boolean preRead(Class type) { return true; @@ -635,16 +636,18 @@ private static void serializePlan(Object plan, OutputStream out, boolean cloning } } - private static void serializePlan(Kryo kryo, Object plan, OutputStream out, boolean cloningPlan) { - PerfLogger perfLogger = SessionState.getPerfLogger(); - perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.SERIALIZE_PLAN); - LOG.info("Serializing " + plan.getClass().getSimpleName() + " using kryo"); - if (cloningPlan) { - serializeObjectByKryo(kryo, plan, out); - } else { - serializeObjectByKryo(kryo, plan, out); + private static void serializePlan(Kryo kryo, Object plan, OutputStream out, + boolean cloningPlan) { + try (PerfTimer compileTimer = SessionState.getPerfTimer( + SerializationUtilities.class, PerfTimedAction.SERIALIZE_PLAN)) { + LOG.info( + "Serializing " + plan.getClass().getSimpleName() + " using kryo"); + if (cloningPlan) { + serializeObjectByKryo(kryo, plan, out); + } else { + serializeObjectByKryo(kryo, plan, out); + } } - perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.SERIALIZE_PLAN); } /** @@ -675,17 +678,17 @@ private static void serializePlan(Kryo kryo, Object plan, OutputStream out, bool private static T deserializePlan(Kryo kryo, InputStream in, Class planClass, boolean cloningPlan) { - PerfLogger perfLogger = SessionState.getPerfLogger(); - perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.DESERIALIZE_PLAN); - T plan; - LOG.info("Deserializing " + planClass.getSimpleName() + " using kryo"); - if (cloningPlan) { - plan = deserializeObjectByKryo(kryo, in, planClass); - } else { - plan = deserializeObjectByKryo(kryo, in, planClass); + try (PerfTimer compileTimer = SessionState.getPerfTimer( + SerializationUtilities.class, PerfTimedAction.DESERIALIZE_PLAN)) { + final T plan; + LOG.info("Deserializing " + planClass.getSimpleName() + " using kryo"); + if (cloningPlan) { + plan = deserializeObjectByKryo(kryo, in, planClass); + } else { + plan = deserializeObjectByKryo(kryo, in, planClass); + } + return plan; } - perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.DESERIALIZE_PLAN); - return plan; } /** @@ -695,20 +698,21 @@ private static void serializePlan(Kryo kryo, Object plan, OutputStream out, bool */ public static MapredWork clonePlan(MapredWork plan) { // TODO: need proper clone. Meanwhile, let's at least keep this horror in one place - PerfLogger perfLogger = SessionState.getPerfLogger(); - perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.CLONE_PLAN); - Operator op = plan.getAnyOperator(); - CompilationOpContext ctx = (op == null) ? null : op.getCompilationOpContext(); - ByteArrayOutputStream baos = new ByteArrayOutputStream(4096); - serializePlan(plan, baos, true); - MapredWork newPlan = deserializePlan(new ByteArrayInputStream(baos.toByteArray()), - MapredWork.class, true); - // Restore the context. - for (Operator newOp : newPlan.getAllOperators()) { - newOp.setCompilationOpContext(ctx); + try (PerfTimer compileTimer = SessionState.getPerfTimer( + SerializationUtilities.class, PerfTimedAction.CLONE_PLAN)) { + Operator op = plan.getAnyOperator(); + CompilationOpContext ctx = + (op == null) ? null : op.getCompilationOpContext(); + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + serializePlan(plan, baos, true); + MapredWork newPlan = deserializePlan( + new ByteArrayInputStream(baos.toByteArray()), MapredWork.class, true); + // Restore the context. + for (Operator newOp : newPlan.getAllOperators()) { + newOp.setCompilationOpContext(ctx); + } + return newPlan; } - perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.CLONE_PLAN); - return newPlan; } /** @@ -746,20 +750,21 @@ public static MapredWork clonePlan(MapredWork plan) { * @return The clone. */ public static BaseWork cloneBaseWork(BaseWork plan) { - PerfLogger perfLogger = SessionState.getPerfLogger(); - perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.CLONE_PLAN); - Operator op = plan.getAnyRootOperator(); - CompilationOpContext ctx = (op == null) ? null : op.getCompilationOpContext(); - ByteArrayOutputStream baos = new ByteArrayOutputStream(4096); - serializePlan(plan, baos, true); - BaseWork newPlan = deserializePlan(new ByteArrayInputStream(baos.toByteArray()), - plan.getClass(), true); - // Restore the context. - for (Operator newOp : newPlan.getAllOperators()) { - newOp.setCompilationOpContext(ctx); + try (PerfTimer compileTimer = SessionState.getPerfTimer( + SerializationUtilities.class, PerfTimedAction.CLONE_PLAN)) { + Operator op = plan.getAnyRootOperator(); + CompilationOpContext ctx = + (op == null) ? null : op.getCompilationOpContext(); + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + serializePlan(plan, baos, true); + BaseWork newPlan = deserializePlan( + new ByteArrayInputStream(baos.toByteArray()), plan.getClass(), true); + // Restore the context. + for (Operator newOp : newPlan.getAllOperators()) { + newOp.setCompilationOpContext(ctx); + } + return newPlan; } - perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.CLONE_PLAN); - return newPlan; } /** diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/SparkHashTableSinkOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/SparkHashTableSinkOperator.java index 10144a1352..efb017610f 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/SparkHashTableSinkOperator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/SparkHashTableSinkOperator.java @@ -32,7 +32,8 @@ import org.apache.hadoop.hive.ql.CompilationOpContext; import org.apache.hadoop.hive.ql.exec.persistence.MapJoinPersistableTableContainer; import org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainerSerDe; -import org.apache.hadoop.hive.ql.log.PerfLogger; +import org.apache.hadoop.hive.ql.log.PerfTimedAction; +import org.apache.hadoop.hive.ql.log.PerfTimer; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.plan.BucketMapJoinContext; import org.apache.hadoop.hive.ql.plan.MapredLocalWork; @@ -47,9 +48,7 @@ public class SparkHashTableSinkOperator extends TerminalOperator implements Serializable { private static final long serialVersionUID = 1L; - private final String CLASS_NAME = this.getClass().getName(); - private final transient PerfLogger perfLogger = SessionState.getPerfLogger(); - protected static final Logger LOG = LoggerFactory.getLogger(SparkHashTableSinkOperator.class.getName()); + protected static final Logger LOG = LoggerFactory.getLogger(SparkHashTableSinkOperator.class); private static final String MAPRED_FILE_REPLICATION = "mapreduce.client.submit.file.replication"; private static final int DEFAULT_REPLICATION = 10; @@ -95,16 +94,12 @@ public void closeOp(boolean abort) throws HiveException { || mapJoinTables[tag] == null) { LOG.debug("mapJoinTable is null"); } else if (abort) { - if (LOG.isDebugEnabled()) { - LOG.debug("Aborting, skip dumping side-table for tag: " + tag); - } + LOG.debug("Aborting, skip dumping side-table for tag: {}", tag); } else { - String method = PerfLogger.SPARK_FLUSH_HASHTABLE + getName(); - perfLogger.PerfLogBegin(CLASS_NAME, method); - try { + try (PerfTimer compileTimer = + SessionState.getPerfTimer(SparkHashTableSinkOperator.class, + PerfTimedAction.SPARK_FLUSH_HASHTABLE)) { flushToFile(mapJoinTables[tag], tag); - } finally { - perfLogger.PerfLogEnd(CLASS_NAME, method); } } super.closeOp(abort); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java index a7770b4e53..f3b04d6d7f 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java @@ -32,7 +32,6 @@ import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; -import java.io.Serializable; import java.net.URI; import java.net.URL; import java.net.URLClassLoader; @@ -110,6 +109,7 @@ import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants; import org.apache.hadoop.hive.metastore.utils.MetaStoreUtils; import org.apache.hadoop.hive.ql.Context; +import org.apache.hadoop.hive.ql.Driver; import org.apache.hadoop.hive.ql.ErrorMsg; import org.apache.hadoop.hive.ql.DriverState; import org.apache.hadoop.hive.ql.QueryPlan; @@ -141,7 +141,8 @@ import org.apache.hadoop.hive.ql.io.merge.MergeFileWork; import org.apache.hadoop.hive.ql.io.rcfile.truncate.ColumnTruncateMapper; import org.apache.hadoop.hive.ql.io.rcfile.truncate.ColumnTruncateWork; -import org.apache.hadoop.hive.ql.log.PerfLogger; +import org.apache.hadoop.hive.ql.log.PerfTimer; +import org.apache.hadoop.hive.ql.log.PerfTimedAction; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.metadata.HiveStorageHandler; import org.apache.hadoop.hive.ql.metadata.HiveUtils; @@ -301,8 +302,7 @@ private Utilities() { private static GlobalWorkMapFactory gWorkMap = new GlobalWorkMapFactory(); - private static final String CLASS_NAME = Utilities.class.getName(); - private static final Logger LOG = LoggerFactory.getLogger(CLASS_NAME); + private static final Logger LOG = LoggerFactory.getLogger(Utilities.class); public static void clearWork(Configuration conf) { Path mapPath = getPlanPath(conf, MAP_PLAN_NAME); @@ -1435,7 +1435,6 @@ public static void mvFileToFinalPath(Path specPath, Configuration hconf, FileSystem fs = specPath.getFileSystem(hconf); Path tmpPath = Utilities.toTempPath(specPath); Path taskTmpPath = Utilities.toTaskTempPath(specPath); - PerfLogger perfLogger = SessionState.getPerfLogger(); boolean isBlobStorage = BlobStorageUtils.isBlobStorageFileSystem(hconf, fs); boolean avoidRename = false; boolean shouldAvoidRename = shouldAvoidRename(conf, hconf); @@ -1452,32 +1451,36 @@ public static void mvFileToFinalPath(Path specPath, Configuration hconf, Path tmpPathOriginal = tmpPath; tmpPath = new Path(tmpPath.getParent(), tmpPath.getName() + ".moved"); LOG.debug("shouldAvoidRename is false therefore moving/renaming " + tmpPathOriginal + " to " + tmpPath); - perfLogger.PerfLogBegin("FileSinkOperator", "rename"); - Utilities.rename(fs, tmpPathOriginal, tmpPath); - perfLogger.PerfLogEnd("FileSinkOperator", "rename"); + try (PerfTimer compileTimer = SessionState.getPerfTimer( + FileSinkOperator.class, PerfTimedAction.RENAME_FILE)) { + Utilities.rename(fs, tmpPathOriginal, tmpPath); + } } // Remove duplicates from tmpPath List statusList = HiveStatsUtils.getFileStatusRecurse( tmpPath, ((dpCtx == null) ? 1 : dpCtx.getNumDPCols()), fs); FileStatus[] statuses = statusList.toArray(new FileStatus[statusList.size()]); - if(statuses != null && statuses.length > 0) { - Set filesKept = new HashSet<>(); - perfLogger.PerfLogBegin("FileSinkOperator", "RemoveTempOrDuplicateFiles"); - // remove any tmp file or double-committed output files - List emptyBuckets = Utilities.removeTempOrDuplicateFiles( - fs, statuses, dpCtx, conf, hconf, filesKept, false); - perfLogger.PerfLogEnd("FileSinkOperator", "RemoveTempOrDuplicateFiles"); + if (statuses != null && statuses.length > 0) { + final Set filesKept = new HashSet<>(); + final List emptyBuckets; + try (PerfTimer compileTimer = SessionState.getPerfTimer( + FileSinkOperator.class, PerfTimedAction.REMOVE_TMP_DUP_FILES)) { + // remove any tmp file or double-committed output files + emptyBuckets = Utilities.removeTempOrDuplicateFiles( + fs, statuses, dpCtx, conf, hconf, filesKept, false); + } // create empty buckets if necessary if (!emptyBuckets.isEmpty()) { - perfLogger.PerfLogBegin("FileSinkOperator", "CreateEmptyBuckets"); - createEmptyBuckets( - hconf, emptyBuckets, conf.getCompressed(), conf.getTableInfo(), reporter); - for(Path p:emptyBuckets) { - FileStatus[] items = fs.listStatus(p); - filesKept.addAll(Arrays.asList(items)); + try (PerfTimer compileTimer = SessionState.getPerfTimer( + FileSinkOperator.class, PerfTimedAction.CREATE_EMPTY_BUCKETS)) { + createEmptyBuckets(hconf, emptyBuckets, conf.getCompressed(), + conf.getTableInfo(), reporter); + for (Path p : emptyBuckets) { + FileStatus[] items = fs.listStatus(p); + filesKept.addAll(Arrays.asList(items)); + } } - perfLogger.PerfLogEnd("FileSinkOperator", "CreateEmptyBuckets"); } // move to the file destination @@ -1488,16 +1491,19 @@ public static void mvFileToFinalPath(Path specPath, Configuration hconf, conf.getFilesToFetch().addAll(filesKept); } else if (conf !=null && conf.isCTASorCM() && isBlobStorage) { // for CTAS or Create MV statements - perfLogger.PerfLogBegin("FileSinkOperator", "moveSpecifiedFileStatus"); - LOG.debug("CTAS/Create MV: Files being renamed: " + filesKept.toString()); - Utilities.moveSpecifiedFileStatus(fs, tmpPath, specPath, filesKept); - perfLogger.PerfLogEnd("FileSinkOperator", "moveSpecifiedFileStatus"); + try (PerfTimer compileTimer = SessionState.getPerfTimer( + FileSinkOperator.class, PerfTimedAction.MOVE_FILE_STATUS)) { + LOG.debug("CTAS/Create MV: Files being renamed: {}", filesKept); + Utilities.moveSpecifiedFileStatus(fs, tmpPath, specPath, filesKept); + } } else { // for rest of the statement e.g. INSERT, LOAD etc - perfLogger.PerfLogBegin("FileSinkOperator", "RenameOrMoveFiles"); - LOG.debug("Final renaming/moving. Source: " + tmpPath + " .Destination: " + specPath); - Utilities.renameOrMoveFiles(fs, tmpPath, specPath); - perfLogger.PerfLogEnd("FileSinkOperator", "RenameOrMoveFiles"); + try (PerfTimer compileTimer = SessionState.getPerfTimer( + FileSinkOperator.class, PerfTimedAction.RENAME_MOVE_FILES)) { + LOG.debug("Final renaming/moving. Source: [{}] Destination: [{}]", + tmpPath, specPath); + Utilities.renameOrMoveFiles(fs, tmpPath, specPath); + } } } } else { @@ -2344,53 +2350,56 @@ static int getMaxExecutorsForInputListing(final Configuration conf, int inputLoc */ public static ContentSummary getInputSummary(final Context ctx, MapWork work, PathFilter filter) throws IOException { - PerfLogger perfLogger = SessionState.getPerfLogger(); - perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.INPUT_SUMMARY); final long[] summary = {0L, 0L, 0L}; final Set pathNeedProcess = new HashSet<>(); - // Since multiple threads could call this method concurrently, locking - // this method will avoid number of threads out of control. - synchronized (INPUT_SUMMARY_LOCK) { - // For each input path, calculate the total size. - for (final Path path : work.getPathToAliases().keySet()) { - if (path == null) { - continue; - } - if (filter != null && !filter.accept(path)) { - continue; - } + try (PerfTimer compileTimer = SessionState + .getPerfTimer(FileSinkOperator.class, PerfTimedAction.INPUT_SUMMARY)) { - ContentSummary cs = ctx.getCS(path); - if (cs != null) { - summary[0] += cs.getLength(); - summary[1] += cs.getFileCount(); - summary[2] += cs.getDirectoryCount(); - } else { - pathNeedProcess.add(path); + // Since multiple threads could call this method concurrently, locking + // this method will avoid number of threads out of control. + synchronized (INPUT_SUMMARY_LOCK) { + // For each input path, calculate the total size. + for (final Path path : work.getPathToAliases().keySet()) { + if (path == null) { + continue; + } + if (filter != null && !filter.accept(path)) { + continue; + } + + ContentSummary cs = ctx.getCS(path); + if (cs != null) { + summary[0] += cs.getLength(); + summary[1] += cs.getFileCount(); + summary[2] += cs.getDirectoryCount(); + } else { + pathNeedProcess.add(path); + } } - } - // Process the case when name node call is needed - final ExecutorService executor; + // Process the case when name node call is needed + final ExecutorService executor; - int numExecutors = getMaxExecutorsForInputListing(ctx.getConf(), pathNeedProcess.size()); - if (numExecutors > 1) { - LOG.info("Using {} threads for getContentSummary", numExecutors); - executor = Executors.newFixedThreadPool(numExecutors, - new ThreadFactoryBuilder().setDaemon(true) - .setNameFormat("Get-Input-Summary-%d").build()); - } else { - LOG.info("Not using thread pool for getContentSummary"); - executor = MoreExecutors.newDirectExecutorService(); + int numExecutors = getMaxExecutorsForInputListing(ctx.getConf(), + pathNeedProcess.size()); + if (numExecutors > 1) { + LOG.info("Using {} threads for getContentSummary", numExecutors); + executor = Executors.newFixedThreadPool(numExecutors, + new ThreadFactoryBuilder().setDaemon(true) + .setNameFormat("Get-Input-Summary-%d").build()); + } else { + LOG.info("Not using thread pool for getContentSummary"); + executor = MoreExecutors.newDirectExecutorService(); + } + getInputSummaryWithPool(ctx, + Collections.unmodifiableSet(pathNeedProcess), work, summary, + executor); } - getInputSummaryWithPool(ctx, Collections.unmodifiableSet(pathNeedProcess), - work, summary, executor); - perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.INPUT_SUMMARY); } - return new ContentSummary.Builder().length(summary[0]) - .fileCount(summary[1]).directoryCount(summary[2]).build(); + return new ContentSummary.Builder().length(summary[0]).fileCount(summary[1]) + .directoryCount(summary[2]).build(); } /** @@ -3262,102 +3271,106 @@ public static String getVertexCounterName(String counter, String vertexName) { * @return List of paths to process for the given MapWork * @throws Exception */ - public static List getInputPaths(JobConf job, MapWork work, Path hiveScratchDir, - Context ctx, boolean skipDummy) throws Exception { - - PerfLogger perfLogger = SessionState.getPerfLogger(); - perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.INPUT_PATHS); + public static List getInputPaths(JobConf job, MapWork work, + Path hiveScratchDir, Context ctx, boolean skipDummy) throws Exception { - Set pathsProcessed = new HashSet(); - List pathsToAdd = new LinkedList(); - DriverState driverState = DriverState.getDriverState(); - // AliasToWork contains all the aliases - Collection aliasToWork = work.getAliasToWork().keySet(); - if (!skipDummy) { - // ConcurrentModification otherwise if adding dummy. - aliasToWork = new ArrayList<>(aliasToWork); - } - for (String alias : aliasToWork) { - LOG.info("Processing alias {}", alias); + try (PerfTimer compileTimer = + SessionState.getPerfTimer(Driver.class, PerfTimedAction.INPUT_PATHS)) { - // The alias may not have any path - Collection>> pathToAliases = work.getPathToAliases().entrySet(); + Set pathsProcessed = new HashSet(); + List pathsToAdd = new LinkedList(); + DriverState driverState = DriverState.getDriverState(); + // AliasToWork contains all the aliases + Collection aliasToWork = work.getAliasToWork().keySet(); if (!skipDummy) { // ConcurrentModification otherwise if adding dummy. - pathToAliases = new ArrayList<>(pathToAliases); + aliasToWork = new ArrayList<>(aliasToWork); } - boolean isEmptyTable = true; - boolean hasLogged = false; - - for (Map.Entry> e : pathToAliases) { - if (driverState != null && driverState.isAborted()) { - throw new IOException("Operation is Canceled."); + for (String alias : aliasToWork) { + LOG.info("Processing alias {}", alias); + + // The alias may not have any path + Collection>> pathToAliases = + work.getPathToAliases().entrySet(); + if (!skipDummy) { + // ConcurrentModification otherwise if adding dummy. + pathToAliases = new ArrayList<>(pathToAliases); } + boolean isEmptyTable = true; + boolean hasLogged = false; - Path file = e.getKey(); - List aliases = e.getValue(); - if (aliases.contains(alias)) { - if (file != null) { - isEmptyTable = false; - } else { - LOG.warn("Found a null path for alias {}", alias); - continue; + for (Map.Entry> e : pathToAliases) { + if (driverState != null && driverState.isAborted()) { + throw new IOException("Operation is Canceled."); } - // Multiple aliases can point to the same path - it should be - // processed only once - if (pathsProcessed.contains(file)) { - continue; - } + Path file = e.getKey(); + List aliases = e.getValue(); + if (aliases.contains(alias)) { + if (file != null) { + isEmptyTable = false; + } else { + LOG.warn("Found a null path for alias {}", alias); + continue; + } + + // Multiple aliases can point to the same path - it should be + // processed only once + if (pathsProcessed.contains(file)) { + continue; + } + + StringInternUtils.internUriStringsInPath(file); + pathsProcessed.add(file); + LOG.debug("Adding input file {}", file); + if (!hasLogged) { + hasLogged = true; + LOG.info("Adding {} inputs; the first input is {}", + work.getPathToAliases().size(), file); + } - StringInternUtils.internUriStringsInPath(file); - pathsProcessed.add(file); - LOG.debug("Adding input file {}", file); - if (!hasLogged) { - hasLogged = true; - LOG.info("Adding {} inputs; the first input is {}", - work.getPathToAliases().size(), file); + pathsToAdd.add(file); } + } - pathsToAdd.add(file); + // If the query references non-existent partitions + // We need to add a empty file, it is not acceptable to change the + // operator tree + // Consider the query: + // select * from (select count(1) from T union all select count(1) from + // T2) x; + // If T is empty and T2 contains 100 rows, the user expects: 0, 100 (2 + // rows) + if (isEmptyTable && !skipDummy) { + pathsToAdd.add( + createDummyFileForEmptyTable(job, work, hiveScratchDir, alias)); } } - // If the query references non-existent partitions - // We need to add a empty file, it is not acceptable to change the - // operator tree - // Consider the query: - // select * from (select count(1) from T union all select count(1) from - // T2) x; - // If T is empty and T2 contains 100 rows, the user expects: 0, 100 (2 - // rows) - if (isEmptyTable && !skipDummy) { - pathsToAdd.add(createDummyFileForEmptyTable(job, work, hiveScratchDir, alias)); - } - } + List finalPathsToAdd = new LinkedList<>(); - List finalPathsToAdd = new LinkedList<>(); - - int numExecutors = getMaxExecutorsForInputListing(job, pathsToAdd.size()); - if (numExecutors > 1) { - ExecutorService pool = Executors.newFixedThreadPool(numExecutors, - new ThreadFactoryBuilder().setDaemon(true).setNameFormat("Get-Input-Paths-%d").build()); + int numExecutors = getMaxExecutorsForInputListing(job, pathsToAdd.size()); + if (numExecutors > 1) { + ExecutorService pool = Executors.newFixedThreadPool(numExecutors, + new ThreadFactoryBuilder().setDaemon(true) + .setNameFormat("Get-Input-Paths-%d").build()); - finalPathsToAdd.addAll(getInputPathsWithPool(job, work, hiveScratchDir, ctx, skipDummy, pathsToAdd, pool)); - } else { - for (final Path path : pathsToAdd) { - if (driverState != null && driverState.isAborted()) { - throw new IOException("Operation is Canceled."); + finalPathsToAdd.addAll(getInputPathsWithPool(job, work, hiveScratchDir, + ctx, skipDummy, pathsToAdd, pool)); + } else { + for (final Path path : pathsToAdd) { + if (driverState != null && driverState.isAborted()) { + throw new IOException("Operation is Canceled."); + } + Path newPath = new GetInputPathsCallable(path, job, work, + hiveScratchDir, ctx, skipDummy).call(); + updatePathForMapWork(newPath, work, path); + finalPathsToAdd.add(newPath); } - Path newPath = new GetInputPathsCallable(path, job, work, hiveScratchDir, ctx, skipDummy).call(); - updatePathForMapWork(newPath, work, path); - finalPathsToAdd.add(newPath); } - } - perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.INPUT_PATHS); - - return finalPathsToAdd; + return finalPathsToAdd; + } } @VisibleForTesting diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkDynamicPartitionPruner.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkDynamicPartitionPruner.java index b9285accbd..47b85463de 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkDynamicPartitionPruner.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkDynamicPartitionPruner.java @@ -31,7 +31,6 @@ import java.util.Set; import com.google.common.base.Preconditions; -import org.apache.hadoop.hive.ql.log.PerfLogger; import org.apache.hadoop.hive.ql.optimizer.spark.SparkPartitionPruningSinkDesc; import org.apache.hadoop.hive.ql.session.SessionState; import org.slf4j.Logger; @@ -41,6 +40,8 @@ import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator; import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluatorFactory; +import org.apache.hadoop.hive.ql.log.PerfTimedAction; +import org.apache.hadoop.hive.ql.log.PerfTimer; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; import org.apache.hadoop.hive.ql.plan.MapWork; @@ -66,9 +67,7 @@ public class SparkDynamicPartitionPruner { private static final Logger LOG = LoggerFactory.getLogger(SparkDynamicPartitionPruner.class); - private static final String CLASS_NAME = SparkDynamicPartitionPruner.class.getName(); - private final PerfLogger perfLogger = SessionState.getPerfLogger(); private final Map> sourceInfoMap = new LinkedHashMap>(); private final BytesWritable writable = new BytesWritable(); @@ -79,12 +78,13 @@ public void prune(MapWork work, JobConf jobConf) throws HiveException, SerDeExce // Nothing to prune for this MapWork return; } - perfLogger.PerfLogBegin(CLASS_NAME, - PerfLogger.SPARK_DYNAMICALLY_PRUNE_PARTITIONS + work.getName()); - processFiles(work, jobConf); - prunePartitions(work); - perfLogger.PerfLogBegin(CLASS_NAME, - PerfLogger.SPARK_DYNAMICALLY_PRUNE_PARTITIONS + work.getName()); + + try (PerfTimer initTimer = SessionState.getPerfTimer( + SparkDynamicPartitionPruner.class, + PerfTimedAction.SPARK_DYNAMICALLY_PRUNE_PARTITIONS, work.getName())) { + processFiles(work, jobConf); + prunePartitions(work); + } } public void initialize(MapWork work, JobConf jobConf) throws SerDeException { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkMapRecordHandler.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkMapRecordHandler.java index 530131f207..423b28b88d 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkMapRecordHandler.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkMapRecordHandler.java @@ -24,6 +24,7 @@ import org.apache.hadoop.hive.ql.plan.PartitionDesc; import org.apache.hadoop.hive.ql.plan.TableDesc; +import org.apache.hadoop.hive.ql.session.SessionState; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.hive.ql.CompilationOpContext; @@ -36,7 +37,8 @@ import org.apache.hadoop.hive.ql.exec.mr.ExecMapper.ReportStats; import org.apache.hadoop.hive.ql.exec.mr.ExecMapperContext; import org.apache.hadoop.hive.ql.exec.vector.VectorMapOperator; -import org.apache.hadoop.hive.ql.log.PerfLogger; +import org.apache.hadoop.hive.ql.log.PerfTimedAction; +import org.apache.hadoop.hive.ql.log.PerfTimer; import org.apache.hadoop.hive.ql.plan.MapWork; import org.apache.hadoop.hive.ql.plan.MapredLocalWork; import org.apache.hadoop.hive.ql.plan.OperatorDesc; @@ -63,8 +65,16 @@ private ExecMapperContext execContext; @Override - public void init(JobConf job, OutputCollector output, Reporter reporter) throws Exception { - perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.SPARK_INIT_OPERATORS); + public void init(JobConf job, OutputCollector output, + Reporter reporter) throws Exception { + try (PerfTimer compileTimer = SessionState.getPerfTimer( + SparkRecordHandler.class, PerfTimedAction.SPARK_INIT_OPERATORS)) { + doInit(job, output, reporter); + } + } + + protected void doInit(JobConf job, OutputCollector output, + Reporter reporter) throws Exception { super.init(job, output, reporter); try { @@ -124,7 +134,6 @@ throw new RuntimeException("Map operator initialization failed: " + e, e); } } - perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.SPARK_INIT_OPERATORS); } @Override diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkPlan.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkPlan.java index 8244dcb1a9..778d04fc2a 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkPlan.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkPlan.java @@ -31,7 +31,6 @@ import org.apache.commons.lang.StringUtils; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.exec.ExplainTask; -import org.apache.hadoop.hive.ql.exec.Utilities; import org.apache.hadoop.hive.ql.parse.ExplainConfiguration; import org.apache.hadoop.hive.ql.plan.ExplainWork; import org.apache.hadoop.mapred.JobConf; @@ -40,7 +39,8 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.hive.ql.io.HiveKey; -import org.apache.hadoop.hive.ql.log.PerfLogger; +import org.apache.hadoop.hive.ql.log.PerfTimedAction; +import org.apache.hadoop.hive.ql.log.PerfTimer; import org.apache.hadoop.hive.ql.session.SessionState; import org.apache.hadoop.io.BytesWritable; import org.apache.spark.api.java.JavaPairRDD; @@ -49,9 +49,7 @@ @SuppressWarnings("rawtypes") public class SparkPlan { - private static final String CLASS_NAME = SparkPlan.class.getName(); private static final Logger LOG = LoggerFactory.getLogger(SparkPlan.class); - private final PerfLogger perfLogger = SessionState.getPerfLogger(); private final Set rootTrans = new HashSet(); private final Set leafTrans = new HashSet(); @@ -67,9 +65,15 @@ this.sc = sc; } - @SuppressWarnings("unchecked") public JavaPairRDD generateGraph() { - perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.SPARK_BUILD_RDD_GRAPH); + try (PerfTimer compileTimer = SessionState.getPerfTimer( + SparkPlan.class, PerfTimedAction.SPARK_BUILD_RDD_GRAPH)) { + return doGenerateGraph(); + } + } + + @SuppressWarnings("unchecked") + protected JavaPairRDD doGenerateGraph() { Map> tranToOutputRDDMap = new HashMap>(); for (SparkTran tran : getAllTrans()) { @@ -113,8 +117,6 @@ } } - perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.SPARK_BUILD_RDD_GRAPH); - LOG.info("\n\nSpark RDD Graph:\n\n" + finalRDD.toDebugString() + "\n"); return finalRDD; @@ -134,7 +136,6 @@ private String getLongFormCallSite(SparkTran tran) { if (this.jobConf.getBoolean(HiveConf.ConfVars.HIVE_SPARK_LOG_EXPLAIN_WEBUI.varname, HiveConf .ConfVars.HIVE_SPARK_LOG_EXPLAIN_WEBUI.defaultBoolVal)) { - perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.SPARK_CREATE_EXPLAIN_PLAN + tran.getName()); ExplainWork explainWork = new ExplainWork(); explainWork.setConfig(new ExplainConfiguration()); @@ -142,7 +143,8 @@ private String getLongFormCallSite(SparkTran tran) { explainTask.setWork(explainWork); String explainOutput = ""; - try { + try (PerfTimer compileTimer = SessionState.getPerfTimer(SparkPlan.class, + PerfTimedAction.SPARK_CREATE_EXPLAIN_PLAN)) { ByteArrayOutputStream outputStream = new ByteArrayOutputStream(); explainTask.outputPlan(tran.getBaseWork(), new PrintStream(outputStream), false, false, 0, null, this.jobConf.getBoolean(HiveConf.ConfVars.HIVE_IN_TEST.varname, @@ -154,7 +156,6 @@ private String getLongFormCallSite(SparkTran tran) { LOG.error("Error while generating explain plan for " + tran.getName(), e); } - perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.SPARK_CREATE_EXPLAIN_PLAN + tran.getName()); return explainOutput; } return ""; diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkPlanGenerator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkPlanGenerator.java index 806deb5f31..c177ae6f64 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkPlanGenerator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkPlanGenerator.java @@ -18,19 +18,13 @@ package org.apache.hadoop.hive.ql.exec.spark; -import java.io.File; -import java.io.FileNotFoundException; import java.io.IOException; -import java.lang.reflect.InvocationTargetException; -import java.net.MalformedURLException; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Set; import java.util.stream.Collectors; -import org.apache.hive.spark.client.SparkClientUtilities; -import org.apache.spark.SparkConf; import org.apache.spark.util.CallSite; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -41,7 +35,8 @@ import org.apache.hadoop.hive.ql.io.merge.MergeFileMapper; import org.apache.hadoop.hive.ql.io.merge.MergeFileOutputFormat; import org.apache.hadoop.hive.ql.io.merge.MergeFileWork; -import org.apache.hadoop.hive.ql.log.PerfLogger; +import org.apache.hadoop.hive.ql.log.PerfTimedAction; +import org.apache.hadoop.hive.ql.log.PerfTimer; import org.apache.hadoop.hive.ql.session.SessionState; import org.apache.hadoop.mapred.FileOutputFormat; import org.apache.hadoop.hive.conf.HiveConf; @@ -75,10 +70,8 @@ @SuppressWarnings("rawtypes") public class SparkPlanGenerator { - private static final String CLASS_NAME = SparkPlanGenerator.class.getName(); private static final Logger LOG = LoggerFactory.getLogger(SparkPlanGenerator.class); - private final PerfLogger perfLogger = SessionState.getPerfLogger(); private final JavaSparkContext sc; private final JobConf jobConf; private final Context context; @@ -114,33 +107,34 @@ public SparkPlanGenerator( } public SparkPlan generate(SparkWork sparkWork) throws Exception { - perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.SPARK_BUILD_PLAN); - SparkPlan sparkPlan = new SparkPlan(this.jobConf, this.sc.sc()); - cloneToWork = sparkWork.getCloneToWork(); - workToTranMap.clear(); - workToParentWorkTranMap.clear(); + try (PerfTimer generateTimer = SessionState.getPerfTimer( + SparkPlanGenerator.class, PerfTimedAction.SPARK_BUILD_PLAN)) { + SparkPlan sparkPlan = new SparkPlan(this.jobConf, this.sc.sc()); + cloneToWork = sparkWork.getCloneToWork(); + workToTranMap.clear(); + workToParentWorkTranMap.clear(); - try { for (BaseWork work : sparkWork.getAllWork()) { - // Run the SparkDynamicPartitionPruner, we run this here instead of inside the - // InputFormat so that we don't have to run pruning when creating a Record Reader - runDynamicPartitionPruner(work); - perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.SPARK_CREATE_TRAN + work.getName()); - SparkTran tran = generate(work, sparkWork); - SparkTran parentTran = generateParentTran(sparkPlan, sparkWork, work); - sparkPlan.addTran(tran); - sparkPlan.connect(parentTran, tran); - workToTranMap.put(work, tran); - perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.SPARK_CREATE_TRAN + work.getName()); + try (PerfTimer tranTimer = + SessionState.getPerfTimer(SparkPlanGenerator.class, + PerfTimedAction.SPARK_CREATE_TRAN, work.getName())) { + // Run the SparkDynamicPartitionPruner, we run this here instead of + // inside the InputFormat so that we don't have to run pruning when + // creating a Record Reader + runDynamicPartitionPruner(work); + SparkTran tran = generate(work, sparkWork); + SparkTran parentTran = generateParentTran(sparkPlan, sparkWork, work); + sparkPlan.addTran(tran); + sparkPlan.connect(parentTran, tran); + workToTranMap.put(work, tran); + } } + return sparkPlan; } finally { // clear all ThreadLocal cached MapWork/ReduceWork after plan generation // as this may executed in a pool thread. Utilities.clearWorkMap(jobConf); } - - perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.SPARK_BUILD_PLAN); - return sparkPlan; } /** diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkRecordHandler.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkRecordHandler.java index f29a9f807c..64c6572187 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkRecordHandler.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkRecordHandler.java @@ -29,8 +29,6 @@ import org.apache.hadoop.hive.ql.exec.MapredContext; import org.apache.hadoop.hive.ql.exec.Utilities; -import org.apache.hadoop.hive.ql.log.PerfLogger; -import org.apache.hadoop.hive.ql.session.SessionState; import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.mapred.OutputCollector; import org.apache.hadoop.mapred.Reporter; @@ -41,8 +39,6 @@ public abstract class SparkRecordHandler { - protected static final String CLASS_NAME = SparkRecordHandler.class.getName(); - protected final PerfLogger perfLogger = SessionState.getPerfLogger(); private static final Logger LOG = LoggerFactory.getLogger(SparkRecordHandler.class); // used to log memory usage periodically diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkReduceRecordHandler.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkReduceRecordHandler.java index 07cb5cb936..2785bf014e 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkReduceRecordHandler.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkReduceRecordHandler.java @@ -37,12 +37,14 @@ import org.apache.hadoop.hive.ql.exec.vector.VectorDeserializeRow; import org.apache.hadoop.hive.ql.exec.vector.VectorizedBatchUtil; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; -import org.apache.hadoop.hive.ql.log.PerfLogger; +import org.apache.hadoop.hive.ql.log.PerfTimedAction; +import org.apache.hadoop.hive.ql.log.PerfTimer; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.plan.MapredLocalWork; import org.apache.hadoop.hive.ql.plan.OperatorDesc; import org.apache.hadoop.hive.ql.plan.ReduceWork; import org.apache.hadoop.hive.ql.plan.TableDesc; +import org.apache.hadoop.hive.ql.session.SessionState; import org.apache.hadoop.hive.serde2.Deserializer; import org.apache.hadoop.hive.serde2.SerDeException; import org.apache.hadoop.hive.serde2.SerDeUtils; @@ -117,9 +119,16 @@ private MapredLocalWork localWork = null; @Override + public void init(JobConf job, OutputCollector output, Reporter reporter) + throws Exception { + try (PerfTimer compileTimer = SessionState.getPerfTimer( + SparkRecordHandler.class, PerfTimedAction.SPARK_INIT_OPERATORS)) { + doInit(job, output, reporter); + } + } + @SuppressWarnings("unchecked") - public void init(JobConf job, OutputCollector output, Reporter reporter) throws Exception { - perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.SPARK_INIT_OPERATORS); + protected void doInit(JobConf job, OutputCollector output, Reporter reporter) throws Exception { super.init(job, output, reporter); rowObjectInspector = new ObjectInspector[Byte.MAX_VALUE]; @@ -253,7 +262,6 @@ public void init(JobConf job, OutputCollector output, Reporter reporter) throws throw new RuntimeException("Reduce operator initialization failed", e); } } - perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.SPARK_INIT_OPERATORS); } /** diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkTask.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkTask.java index 92775107bc..3d92dc13a6 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkTask.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkTask.java @@ -67,6 +67,8 @@ import org.apache.hadoop.hive.ql.exec.spark.status.SparkJobStatus; import org.apache.hadoop.hive.ql.exec.spark.status.SparkStageProgress; import org.apache.hadoop.hive.ql.history.HiveHistory.Keys; +import org.apache.hadoop.hive.ql.log.PerfTimer; +import org.apache.hadoop.hive.ql.log.PerfTimedAction; import org.apache.hadoop.hive.ql.log.PerfLogger; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.plan.BaseWork; @@ -82,8 +84,7 @@ import com.google.common.collect.Lists; public class SparkTask extends Task { - private static final String CLASS_NAME = SparkTask.class.getName(); - private static final Logger LOG = LoggerFactory.getLogger(CLASS_NAME); + private static final Logger LOG = LoggerFactory.getLogger(SparkTask.class); private static final LogHelper console = new LogHelper(LOG); private PerfLogger perfLogger; private static final long serialVersionUID = 1L; @@ -125,10 +126,10 @@ public int execute(DriverContext driverContext) { sparkWork.setRequiredCounterPrefix(getOperatorCounters()); // Submit the Spark job - perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.SPARK_SUBMIT_JOB); - submitTime = perfLogger.getStartTime(PerfLogger.SPARK_SUBMIT_JOB); - jobRef = sparkSession.submit(driverContext, sparkWork); - perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.SPARK_SUBMIT_JOB); + try (PerfTimer compileTimer = SessionState.getPerfTimer(SparkTask.class, + PerfTimedAction.SPARK_SUBMIT_JOB)) { + jobRef = sparkSession.submit(driverContext, sparkWork); + } // If the driver context has been shutdown (due to query cancellation) kill the Spark job if (driverContext.isShutdown()) { @@ -143,7 +144,7 @@ public int execute(DriverContext driverContext) { // Add Spark job handle id to the Hive History addToHistory(Keys.SPARK_JOB_HANDLE_ID, jobRef.getJobId()); - LOG.debug("Starting Spark job with job handle id " + sparkJobHandleId); + LOG.debug("Starting Spark job with job handle id {}", sparkJobHandleId); // Get the application id of the Spark app jobID = jobRef.getSparkJobStatus().getAppID(); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/MapRecordProcessor.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/MapRecordProcessor.java index 8c9d53f521..c7e8552f8f 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/MapRecordProcessor.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/MapRecordProcessor.java @@ -30,6 +30,7 @@ import org.apache.hadoop.hive.llap.LlapUtil; import org.apache.hadoop.hive.ql.plan.PartitionDesc; import org.apache.hadoop.hive.ql.plan.TableDesc; +import org.apache.hadoop.hive.ql.session.SessionState; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; @@ -55,7 +56,8 @@ import org.apache.hadoop.hive.ql.exec.tez.TezProcessor.TezKVOutputCollector; import org.apache.hadoop.hive.ql.exec.tez.tools.KeyValueInputMerger; import org.apache.hadoop.hive.ql.exec.vector.VectorMapOperator; -import org.apache.hadoop.hive.ql.log.PerfLogger; +import org.apache.hadoop.hive.ql.log.PerfTimedAction; +import org.apache.hadoop.hive.ql.log.PerfTimer; import org.apache.hadoop.hive.ql.plan.BaseWork; import org.apache.hadoop.hive.ql.plan.DynamicValue; import org.apache.hadoop.hive.ql.plan.MapWork; @@ -116,7 +118,8 @@ private void setLlapOfFragmentId(final ProcessorContext context) { @Override void init(MRTaskReporter mrReporter, Map inputs, Map outputs) throws Exception { - perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.TEZ_INIT_OPERATORS); + final PerfTimer perfTimer = SessionState.getPerfTimer( + RecordProcessor.class, PerfTimedAction.TEZ_INIT_OPERATORS); super.init(mrReporter, inputs, outputs); checkAbortCondition(); @@ -340,7 +343,7 @@ void init(MRTaskReporter mrReporter, throw new RuntimeException("Map operator initialization failed", e); } } - perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.TEZ_INIT_OPERATORS); + perfTimer.close(); } private void initializeMapRecordSources() throws Exception { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/MergeFileRecordProcessor.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/MergeFileRecordProcessor.java index 13f5f12989..151388dd8e 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/MergeFileRecordProcessor.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/MergeFileRecordProcessor.java @@ -33,9 +33,11 @@ import org.apache.hadoop.hive.ql.exec.mr.ExecMapper; import org.apache.hadoop.hive.ql.exec.mr.ExecMapperContext; import org.apache.hadoop.hive.ql.io.merge.MergeFileWork; -import org.apache.hadoop.hive.ql.log.PerfLogger; +import org.apache.hadoop.hive.ql.log.PerfTimer; +import org.apache.hadoop.hive.ql.log.PerfTimedAction; import org.apache.hadoop.hive.ql.plan.MapWork; import org.apache.hadoop.hive.ql.plan.OperatorDesc; +import org.apache.hadoop.hive.ql.session.SessionState; import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.util.StringUtils; import org.apache.tez.mapreduce.input.MRInputLegacy; @@ -74,75 +76,82 @@ void init( MRTaskReporter mrReporter, Map inputs, Map outputs) throws Exception { // TODO HIVE-14042. Abort handling. - perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.TEZ_INIT_OPERATORS); - super.init(mrReporter, inputs, outputs); - execContext = new ExecMapperContext(jconf); - - //Update JobConf using MRInput, info like filename comes via this - mrInput = getMRInput(inputs); - Configuration updatedConf = mrInput.getConfigUpdates(); - if (updatedConf != null) { - for (Map.Entry entry : updatedConf) { - jconf.set(entry.getKey(), entry.getValue()); + + try ( + PerfTimer runJobTimer = SessionState.getPerfTimer(RecordProcessor.class, + PerfTimedAction.TEZ_INIT_OPERATORS)) { + + super.init(mrReporter, inputs, outputs); + execContext = new ExecMapperContext(jconf); + + // Update JobConf using MRInput, info like filename comes via this + mrInput = getMRInput(inputs); + Configuration updatedConf = mrInput.getConfigUpdates(); + if (updatedConf != null) { + for (Map.Entry entry : updatedConf) { + jconf.set(entry.getKey(), entry.getValue()); + } + } + createOutputMap(); + // Start all the Outputs. + for (Map.Entry outputEntry : outputs.entrySet()) { + outputEntry.getValue().start(); + ((TezProcessor.TezKVOutputCollector) outMap.get(outputEntry.getKey())) + .initialize(); } - } - createOutputMap(); - // Start all the Outputs. - for (Map.Entry outputEntry : outputs.entrySet()) { - outputEntry.getValue().start(); - ((TezProcessor.TezKVOutputCollector) outMap.get(outputEntry.getKey())) - .initialize(); - } - String queryId = HiveConf.getVar(jconf, HiveConf.ConfVars.HIVEQUERYID); - cache = ObjectCacheFactory.getCache(jconf, queryId, true); + String queryId = HiveConf.getVar(jconf, HiveConf.ConfVars.HIVEQUERYID); + cache = ObjectCacheFactory.getCache(jconf, queryId, true); - try { - execContext.setJc(jconf); + try { + execContext.setJc(jconf); - cacheKey = MAP_PLAN_KEY; + cacheKey = MAP_PLAN_KEY; - MapWork mapWork = (MapWork) cache.retrieve(cacheKey, new Callable() { - @Override - public Object call() { - return Utilities.getMapWork(jconf); - } - }); - Utilities.setMapWork(jconf, mapWork); + MapWork mapWork = + (MapWork) cache.retrieve(cacheKey, new Callable() { + @Override + public Object call() { + return Utilities.getMapWork(jconf); + } + }); + Utilities.setMapWork(jconf, mapWork); - if (mapWork instanceof MergeFileWork) { - mfWork = (MergeFileWork) mapWork; - } else { - throw new RuntimeException("MapWork should be an instance of MergeFileWork."); - } + if (mapWork instanceof MergeFileWork) { + mfWork = (MergeFileWork) mapWork; + } else { + throw new RuntimeException( + "MapWork should be an instance of MergeFileWork."); + } - String alias = mfWork.getAliasToWork().keySet().iterator().next(); - mergeOp = mfWork.getAliasToWork().get(alias); - LOG.info(mergeOp.dump(0)); + String alias = mfWork.getAliasToWork().keySet().iterator().next(); + mergeOp = mfWork.getAliasToWork().get(alias); + LOG.info(mergeOp.dump(0)); - MapredContext.init(true, new JobConf(jconf)); - ((TezContext) MapredContext.get()).setInputs(inputs); - mergeOp.passExecContext(execContext); - mergeOp.initializeLocalWork(jconf); - mergeOp.initialize(jconf, null); + MapredContext.init(true, new JobConf(jconf)); + ((TezContext) MapredContext.get()).setInputs(inputs); + mergeOp.passExecContext(execContext); + mergeOp.initializeLocalWork(jconf); + mergeOp.initialize(jconf, null); - OperatorUtils.setChildrenCollector(mergeOp.getChildOperators(), outMap); - mergeOp.setReporter(reporter); - MapredContext.get().setReporter(reporter); - } catch (Throwable e) { - if (e instanceof OutOfMemoryError) { - // will this be true here? - // Don't create a new object if we are already out of memory - throw (OutOfMemoryError) e; - } else if (e instanceof InterruptedException) { - LOG.info("Hit an interrupt while initializing MergeFileRecordProcessor. Message={}", - e.getMessage()); - throw (InterruptedException) e; - } else { - throw new RuntimeException("Map operator initialization failed", e); + OperatorUtils.setChildrenCollector(mergeOp.getChildOperators(), outMap); + mergeOp.setReporter(reporter); + MapredContext.get().setReporter(reporter); + } catch (Throwable e) { + if (e instanceof OutOfMemoryError) { + // will this be true here? + // Don't create a new object if we are already out of memory + throw (OutOfMemoryError) e; + } else if (e instanceof InterruptedException) { + LOG.info( + "Hit an interrupt while initializing MergeFileRecordProcessor. Message={}", + e.getMessage()); + throw (InterruptedException) e; + } else { + throw new RuntimeException("Map operator initialization failed", e); + } } } - perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.TEZ_INIT_OPERATORS); } @Override diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/RecordProcessor.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/RecordProcessor.java index 6697f62d13..9d72a0e6df 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/RecordProcessor.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/RecordProcessor.java @@ -25,7 +25,6 @@ import org.apache.hadoop.hive.ql.exec.ObjectCache; import org.apache.hadoop.hive.ql.exec.Utilities; import org.apache.hadoop.hive.ql.exec.tez.TezProcessor.TezKVOutputCollector; -import org.apache.hadoop.hive.ql.log.PerfLogger; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.plan.BaseWork; import org.apache.hadoop.hive.ql.session.SessionState; @@ -55,9 +54,6 @@ protected MRTaskReporter reporter; - protected PerfLogger perfLogger = SessionState.getPerfLogger(); - protected String CLASS_NAME = RecordProcessor.class.getName(); - public RecordProcessor(JobConf jConf, ProcessorContext processorContext) { this.jconf = jConf; this.processorContext = processorContext; diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ReduceRecordProcessor.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ReduceRecordProcessor.java index 03edbf7bdb..b549f0b996 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ReduceRecordProcessor.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ReduceRecordProcessor.java @@ -41,11 +41,13 @@ import org.apache.hadoop.hive.ql.exec.mr.ExecMapper.ReportStats; import org.apache.hadoop.hive.ql.exec.tez.DynamicValueRegistryTez.RegistryConfTez; import org.apache.hadoop.hive.ql.exec.tez.TezProcessor.TezKVOutputCollector; -import org.apache.hadoop.hive.ql.log.PerfLogger; +import org.apache.hadoop.hive.ql.log.PerfTimedAction; +import org.apache.hadoop.hive.ql.log.PerfTimer; import org.apache.hadoop.hive.ql.plan.BaseWork; import org.apache.hadoop.hive.ql.plan.DynamicValue; import org.apache.hadoop.hive.ql.plan.ReduceWork; import org.apache.hadoop.hive.ql.plan.TableDesc; +import org.apache.hadoop.hive.ql.session.SessionState; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.mapred.JobConf; import org.apache.tez.mapreduce.processor.MRTaskReporter; @@ -99,9 +101,17 @@ public ReduceRecordProcessor(final JobConf jconf, final ProcessorContext context } @Override - void init(MRTaskReporter mrReporter, Map inputs, Map outputs) + void init(MRTaskReporter mrReporter, Map inputs, + Map outputs) throws Exception { + try (PerfTimer initTimer = SessionState.getPerfTimer(RecordProcessor.class, + PerfTimedAction.TEZ_INIT_OPERATORS)) { + doInit(mrReporter, inputs, outputs); + } + } + + void doInit(MRTaskReporter mrReporter, Map inputs, Map outputs) throws Exception { - perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.TEZ_INIT_OPERATORS); + super.init(mrReporter, inputs, outputs); MapredContext.init(false, new JobConf(jconf)); @@ -240,8 +250,6 @@ void init(MRTaskReporter mrReporter, Map inputs, Map valueWritables; private final GroupIterator groupIterator = new GroupIterator(); @@ -234,7 +227,6 @@ void init(JobConf jconf, Operator reducer, boolean vectorized, TableDesc keyT throw new RuntimeException("Reduce operator initialization failed", e); } } - perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.TEZ_INIT_OPERATORS); } @Override diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezProcessor.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezProcessor.java index fa6160fe3c..0d5bd982e4 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezProcessor.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezProcessor.java @@ -31,7 +31,8 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.hive.ql.log.PerfLogger; +import org.apache.hadoop.hive.ql.log.PerfTimedAction; +import org.apache.hadoop.hive.ql.log.PerfTimer; import org.apache.hadoop.hive.ql.session.SessionState; import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.mapred.OutputCollector; @@ -70,9 +71,6 @@ protected JobConf jobConf; - private static final String CLASS_NAME = TezProcessor.class.getName(); - private final PerfLogger perfLogger = SessionState.getPerfLogger(); - // TODO: Replace with direct call to ProgressHelper, when reliably available. private static class ReflectiveProgressHelper { @@ -172,16 +170,18 @@ public void handleEvents(List arg0) { @Override public void initialize() throws IOException { - perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.TEZ_INITIALIZE_PROCESSOR); - Configuration conf = TezUtils.createConfFromUserPayload(getContext().getUserPayload()); - this.jobConf = new JobConf(conf); - this.processorContext = getContext(); - ExecutionContext execCtx = processorContext.getExecutionContext(); - if (execCtx instanceof Hook) { - ((Hook)execCtx).initializeHook(this); + try (PerfTimer compileTimer = SessionState.getPerfTimer(TezProcessor.class, + PerfTimedAction.TEZ_INITIALIZE_PROCESSOR)) { + Configuration conf = + TezUtils.createConfFromUserPayload(getContext().getUserPayload()); + this.jobConf = new JobConf(conf); + this.processorContext = getContext(); + ExecutionContext execCtx = processorContext.getExecutionContext(); + if (execCtx instanceof Hook) { + ((Hook) execCtx).initializeHook(this); + } + setupMRLegacyConfigs(processorContext); } - setupMRLegacyConfigs(processorContext); - perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.TEZ_INITIALIZE_PROCESSOR); } private void setupMRLegacyConfigs(ProcessorContext processorContext) { @@ -216,38 +216,44 @@ public void run(Map inputs, Map out return; } - perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.TEZ_RUN_PROCESSOR); - // in case of broadcast-join read the broadcast edge inputs - // (possibly asynchronously) + try (PerfTimer compileTimer = SessionState.getPerfTimer(TezProcessor.class, + PerfTimedAction.TEZ_RUN_PROCESSOR)) { - if (LOG.isDebugEnabled()) { - LOG.debug("Running task: " + getContext().getUniqueIdentifier()); - } + // in case of broadcast-join read the broadcast edge inputs + // (possibly asynchronously) - synchronized (this) { - // This check isn't absolutely mandatory, given the aborted check outside of the - // Processor creation. - if (aborted.get()) { - return; + if (LOG.isDebugEnabled()) { + LOG.debug("Running task: " + getContext().getUniqueIdentifier()); } - // leverage TEZ-3437: Improve synchronization and the progress report behavior. - progressHelper = new ReflectiveProgressHelper(jobConf, inputs, getContext(), this.getClass().getSimpleName()); - + synchronized (this) { + // This check isn't absolutely mandatory, given the aborted check + // outside of the + // Processor creation. + if (aborted.get()) { + return; + } - // There should be no blocking operation in RecordProcessor creation, - // otherwise the abort operation will not register since they are synchronized on the same - // lock. - if (isMap) { - rproc = new MapRecordProcessor(jobConf, getContext()); - } else { - rproc = new ReduceRecordProcessor(jobConf, getContext()); + // leverage TEZ-3437: Improve synchronization and the progress report + // behavior. + progressHelper = new ReflectiveProgressHelper(jobConf, inputs, + getContext(), this.getClass().getSimpleName()); + + // There should be no blocking operation in RecordProcessor creation, + // otherwise the abort operation will not register since they are + // synchronized on the same + // lock. + if (isMap) { + rproc = new MapRecordProcessor(jobConf, getContext()); + } else { + rproc = new ReduceRecordProcessor(jobConf, getContext()); + } } - } - progressHelper.scheduleProgressTaskService(0, 100); - if (!aborted.get()) { - initializeAndRunProcessor(inputs, outputs); + progressHelper.scheduleProgressTaskService(0, 100); + if (!aborted.get()) { + initializeAndRunProcessor(inputs, outputs); + } } // TODO HIVE-14042. In case of an abort request, throw an InterruptedException } @@ -267,7 +273,6 @@ protected void initializeAndRunProcessor(Map inputs, rproc.run(); //done - output does not need to be committed as hive does not use outputcommitter - perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.TEZ_RUN_PROCESSOR); } catch (Throwable t) { originalThrowable = t; } finally { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezTask.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezTask.java index aecd1084e6..93ae1ec304 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezTask.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezTask.java @@ -47,6 +47,8 @@ import org.apache.hadoop.hive.ql.exec.Utilities; import org.apache.hadoop.hive.ql.exec.tez.monitoring.TezJobMonitor; import org.apache.hadoop.hive.ql.log.PerfLogger; +import org.apache.hadoop.hive.ql.log.PerfTimer; +import org.apache.hadoop.hive.ql.log.PerfTimedAction; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.plan.BaseWork; import org.apache.hadoop.hive.ql.plan.MapWork; @@ -98,8 +100,7 @@ @SuppressWarnings({"serial"}) public class TezTask extends Task { - private static final String CLASS_NAME = TezTask.class.getName(); - private static transient Logger LOG = LoggerFactory.getLogger(CLASS_NAME); + private static Logger LOG = LoggerFactory.getLogger(TezTask.class); private final PerfLogger perfLogger = SessionState.getPerfLogger(); private static final String TEZ_MEMORY_RESERVE_FRACTION = "tez.task.scale.memory.reserve-fraction"; @@ -186,10 +187,10 @@ public int execute(DriverContext driverContext) { CallerContext callerContext = CallerContext.create( "HIVE", queryPlan.getQueryId(), "HIVE_QUERY_ID", queryPlan.getQueryStr()); - perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.TEZ_GET_SESSION); + perfLogger.PerfLogBegin(TezTask.class.getName(), PerfLogger.TEZ_GET_SESSION); session = sessionRef.value = WorkloadManagerFederation.getSession( sessionRef.value, conf, mi, getWork().getLlapMode(), wmContext); - perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.TEZ_GET_SESSION); + perfLogger.PerfLogEnd(TezTask.class.getName(), PerfLogger.TEZ_GET_SESSION); try { ss.setTezSession(session); @@ -389,112 +390,120 @@ void checkOutputSpec(BaseWork work, JobConf jc) throws IOException { DAG build(JobConf conf, TezWork work, Path scratchDir, Context ctx, Map vertexResources) throws Exception { - perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.TEZ_BUILD_DAG); + try (PerfTimer buildDagTimer = SessionState.getPerfTimer(TezTask.class, + PerfTimedAction.TEZ_BUILD_DAG)) { - // getAllWork returns a topologically sorted list, which we use to make - // sure that vertices are created before they are used in edges. - List ws = work.getAllWork(); - Collections.reverse(ws); + // getAllWork returns a topologically sorted list, which we use to make + // sure that vertices are created before they are used in edges. + List ws = work.getAllWork(); + Collections.reverse(ws); - FileSystem fs = scratchDir.getFileSystem(conf); + FileSystem fs = scratchDir.getFileSystem(conf); - // the name of the dag is what is displayed in the AM/Job UI - String dagName = utils.createDagName(conf, queryPlan); + // the name of the dag is what is displayed in the AM/Job UI + String dagName = utils.createDagName(conf, queryPlan); - LOG.info("Dag name: " + dagName); - DAG dag = DAG.create(dagName); + LOG.info("Dag name: {}", dagName); + DAG dag = DAG.create(dagName); - // set some info for the query - JSONObject json = new JSONObject(new LinkedHashMap<>()).put("context", "Hive") - .put("description", ctx.getCmd()); - String dagInfo = json.toString(); + // set some info for the query + JSONObject json = new JSONObject(new LinkedHashMap<>()) + .put("context", "Hive").put("description", ctx.getCmd()); + String dagInfo = json.toString(); - if (LOG.isDebugEnabled()) { - LOG.debug("DagInfo: " + dagInfo); - } - dag.setDAGInfo(dagInfo); + LOG.debug("DagInfo: {}", dagInfo); - dag.setCredentials(conf.getCredentials()); - setAccessControlsForCurrentUser(dag, queryPlan.getQueryId(), conf); + dag.setDAGInfo(dagInfo); - for (BaseWork w: ws) { - boolean isFinal = work.getLeaves().contains(w); + dag.setCredentials(conf.getCredentials()); + setAccessControlsForCurrentUser(dag, queryPlan.getQueryId(), conf); - // translate work to vertex - perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.TEZ_CREATE_VERTEX + w.getName()); + for (BaseWork w : ws) { + boolean isFinal = work.getLeaves().contains(w); - if (w instanceof UnionWork) { - // Special case for unions. These items translate to VertexGroups + // translate work to vertex + try (PerfTimer vertexTimer = SessionState.getPerfTimer(TezTask.class, + PerfTimedAction.TEZ_CREATE_VERTEX, w.getName())) { - List unionWorkItems = new LinkedList(); - List children = new LinkedList(); + if (w instanceof UnionWork) { + // Special case for unions. These items translate to VertexGroups - // split the children into vertices that make up the union and vertices that are - // proper children of the union - for (BaseWork v: work.getChildren(w)) { - EdgeType type = work.getEdgeProperty(w, v).getEdgeType(); - if (type == EdgeType.CONTAINS) { - unionWorkItems.add(v); - } else { - children.add(v); - } - } - JobConf parentConf = workToConf.get(unionWorkItems.get(0)); - checkOutputSpec(w, parentConf); + List unionWorkItems = new LinkedList(); + List children = new LinkedList(); - // create VertexGroup - Vertex[] vertexArray = new Vertex[unionWorkItems.size()]; + // split the children into vertices that make up the union and + // vertices that are proper children of the union + for (BaseWork v : work.getChildren(w)) { + EdgeType type = work.getEdgeProperty(w, v).getEdgeType(); + if (type == EdgeType.CONTAINS) { + unionWorkItems.add(v); + } else { + children.add(v); + } + } + JobConf parentConf = workToConf.get(unionWorkItems.get(0)); + checkOutputSpec(w, parentConf); - int i = 0; - for (BaseWork v: unionWorkItems) { - vertexArray[i++] = workToVertex.get(v); - } - VertexGroup group = dag.createVertexGroup(w.getName(), vertexArray); + // create VertexGroup + Vertex[] vertexArray = new Vertex[unionWorkItems.size()]; - // For a vertex group, all Outputs use the same Key-class, Val-class and partitioner. - // Pick any one source vertex to figure out the Edge configuration. + int i = 0; + for (BaseWork v : unionWorkItems) { + vertexArray[i++] = workToVertex.get(v); + } + VertexGroup group = dag.createVertexGroup(w.getName(), vertexArray); - // now hook up the children - for (BaseWork v: children) { - // finally we can create the grouped edge - GroupInputEdge e = utils.createEdge(group, parentConf, - workToVertex.get(v), work.getEdgeProperty(w, v), v, work); + // For a vertex group, all Outputs use the same Key-class, Val-class + // and partitioner. + // Pick any one source vertex to figure out the Edge configuration. - dag.addEdge(e); - } - } else { - // Regular vertices - JobConf wxConf = utils.initializeVertexConf(conf, ctx, w); - checkOutputSpec(w, wxConf); - Vertex wx = utils.createVertex(wxConf, w, scratchDir, fs, ctx, !isFinal, - work, work.getVertexType(w), vertexResources); - if (w.getReservedMemoryMB() > 0) { - // If reversedMemoryMB is set, make memory allocation fraction adjustment as needed - double frac = DagUtils.adjustMemoryReserveFraction(w.getReservedMemoryMB(), super.conf); - LOG.info("Setting " + TEZ_MEMORY_RESERVE_FRACTION + " to " + frac); - wx.setConf(TEZ_MEMORY_RESERVE_FRACTION, Double.toString(frac)); - } // Otherwise just leave it up to Tez to decide how much memory to allocate - dag.addVertex(wx); - utils.addCredentials(w, dag); - perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.TEZ_CREATE_VERTEX + w.getName()); - workToVertex.put(w, wx); - workToConf.put(w, wxConf); - - // add all dependencies (i.e.: edges) to the graph - for (BaseWork v: work.getChildren(w)) { - assert workToVertex.containsKey(v); - Edge e = null; - - TezEdgeProperty edgeProp = work.getEdgeProperty(w, v); - e = utils.createEdge(wxConf, wx, workToVertex.get(v), edgeProp, v, work); - dag.addEdge(e); + // now hook up the children + for (BaseWork v : children) { + // finally we can create the grouped edge + GroupInputEdge e = utils.createEdge(group, parentConf, + workToVertex.get(v), work.getEdgeProperty(w, v), v, work); + + dag.addEdge(e); + } + } else { + // Regular vertices + JobConf wxConf = utils.initializeVertexConf(conf, ctx, w); + checkOutputSpec(w, wxConf); + Vertex wx = utils.createVertex(wxConf, w, scratchDir, fs, ctx, + !isFinal, work, work.getVertexType(w), vertexResources); + if (w.getReservedMemoryMB() > 0) { + // If reversedMemoryMB is set, make memory allocation fraction + // adjustment as needed + double frac = DagUtils.adjustMemoryReserveFraction( + w.getReservedMemoryMB(), super.conf); + LOG.info( + "Setting " + TEZ_MEMORY_RESERVE_FRACTION + " to " + frac); + wx.setConf(TEZ_MEMORY_RESERVE_FRACTION, Double.toString(frac)); + } // Otherwise just leave it up to Tez to decide how much memory to + // allocate + dag.addVertex(wx); + utils.addCredentials(w, dag); + + workToVertex.put(w, wx); + workToConf.put(w, wxConf); + + // add all dependencies (i.e.: edges) to the graph + for (BaseWork v : work.getChildren(w)) { + assert workToVertex.containsKey(v); + Edge e = null; + + TezEdgeProperty edgeProp = work.getEdgeProperty(w, v); + e = utils.createEdge(wxConf, wx, workToVertex.get(v), edgeProp, v, + work); + dag.addEdge(e); + } + } } } + // Clear the work map after build. TODO: remove caching instead? + Utilities.clearWorkMap(conf); + return dag; } - // Clear the work map after build. TODO: remove caching instead? - Utilities.clearWorkMap(conf); - perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.TEZ_BUILD_DAG); - return dag; } private static void setAccessControlsForCurrentUser(DAG dag, String queryId, @@ -535,7 +544,7 @@ private TezSessionState getNewTezSessionOnError( } DAGClient submit(DAG dag, Ref sessionStateRef) throws Exception { - perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.TEZ_SUBMIT_DAG); + perfLogger.PerfLogBegin(TezTask.class.getName(), PerfLogger.TEZ_SUBMIT_DAG); DAGClient dagClient = null; TezSessionState sessionState = sessionStateRef.value; try { @@ -565,7 +574,7 @@ DAGClient submit(DAG dag, Ref sessionStateRef) throws Exception } } - perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.TEZ_SUBMIT_DAG); + perfLogger.PerfLogEnd(TezTask.class.getName(), PerfLogger.TEZ_SUBMIT_DAG); return new SyncDagClient(dagClient); } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/CombineHiveInputFormat.java b/ql/src/java/org/apache/hadoop/hive/ql/io/CombineHiveInputFormat.java index 1f72477666..08a2e10f43 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/CombineHiveInputFormat.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/CombineHiveInputFormat.java @@ -46,7 +46,8 @@ import org.apache.hadoop.hive.ql.DriverState; import org.apache.hadoop.hive.ql.exec.Operator; import org.apache.hadoop.hive.ql.exec.Utilities; -import org.apache.hadoop.hive.ql.log.PerfLogger; +import org.apache.hadoop.hive.ql.log.PerfTimedAction; +import org.apache.hadoop.hive.ql.log.PerfTimer; import org.apache.hadoop.hive.ql.parse.SplitSample; import org.apache.hadoop.hive.ql.plan.OperatorDesc; import org.apache.hadoop.hive.ql.plan.PartitionDesc; @@ -75,8 +76,8 @@ public class CombineHiveInputFormat extends HiveInputFormat { - private static final String CLASS_NAME = CombineHiveInputFormat.class.getName(); - public static final Logger LOG = LoggerFactory.getLogger(CLASS_NAME); + public static final Logger LOG = + LoggerFactory.getLogger(CombineHiveInputFormat.class); // max number of threads we can use to check non-combinable paths private static final int MAX_CHECK_NONCOMBINABLE_THREAD_NUM = 50; @@ -498,13 +499,19 @@ public int hashCode() { } } + /** * Create Hive splits based on CombineFileSplit. */ @Override public InputSplit[] getSplits(JobConf job, int numSplits) throws IOException { - PerfLogger perfLogger = SessionState.getPerfLogger(); - perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.GET_SPLITS); + try (PerfTimer splitTimer = SessionState.getPerfTimer(CombineHiveInputFormat.class, + PerfTimedAction.GET_SPLITS)) { + return doGetSplits(job, numSplits); + } + } + + public InputSplit[] doGetSplits(JobConf job, int numSplits) throws IOException { init(job); ArrayList result = new ArrayList(); @@ -532,7 +539,6 @@ public int hashCode() { } } catch (Exception e) { LOG.error("Error checking non-combinable path", e); - perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.GET_SPLITS); throw new IOException(e); } } @@ -585,7 +591,6 @@ public int hashCode() { } LOG.info("Number of all splits " + result.size()); - perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.GET_SPLITS); return result.toArray(new InputSplit[result.size()]); } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java b/ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java index c97c961481..f9055c6dfc 100755 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java @@ -37,7 +37,8 @@ import org.apache.hadoop.hive.ql.exec.TableScanOperator; import org.apache.hadoop.hive.ql.exec.Utilities; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; -import org.apache.hadoop.hive.ql.log.PerfLogger; +import org.apache.hadoop.hive.ql.log.PerfTimedAction; +import org.apache.hadoop.hive.ql.log.PerfTimer; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.metadata.HiveStoragePredicateHandler; import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc; @@ -89,8 +90,7 @@ */ public class HiveInputFormat implements InputFormat, JobConfigurable { - private static final String CLASS_NAME = HiveInputFormat.class.getName(); - private static final Logger LOG = LoggerFactory.getLogger(CLASS_NAME); + private static final Logger LOG = LoggerFactory.getLogger(HiveInputFormat.class); /** * A cache of InputFormat instances. @@ -672,8 +672,13 @@ private static void processForWriteIdsForMmRead(Path dir, Configuration conf, @Override public InputSplit[] getSplits(JobConf job, int numSplits) throws IOException { - PerfLogger perfLogger = SessionState.getPerfLogger(); - perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.GET_SPLITS); + try (PerfTimer splitTimer = SessionState.getPerfTimer(HiveInputFormat.class, + PerfTimedAction.GET_SPLITS)) { + return doGetSplits(job, numSplits); + } + } + + protected InputSplit[] doGetSplits(JobConf job, int numSplits) throws IOException { init(job); Path[] dirs = getInputPaths(job); JobConf newjob = new JobConf(job); @@ -770,7 +775,6 @@ private static void processForWriteIdsForMmRead(Path dir, Configuration conf, if (LOG.isInfoEnabled()) { LOG.info("number of splits " + result.size()); } - perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.GET_SPLITS); return result.toArray(new HiveInputSplit[result.size()]); } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/log/LogDivertAppender.java b/ql/src/java/org/apache/hadoop/hive/ql/log/LogDivertAppender.java index dd25f622c7..6a457a3730 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/log/LogDivertAppender.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/log/LogDivertAppender.java @@ -87,6 +87,7 @@ /* Patterns that are included in performance logging level. * In performance mode, show execution and performance logger messages. + * TODO: This is very brittle. Users can supply a custom logger class */ private static final Pattern performanceIncludeNamePattern = Pattern.compile( executionIncludeNamePattern.pattern() + "|" + PerfLogger.class.getName()); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java index 6143e85664..8d30c481a0 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java @@ -180,6 +180,7 @@ import org.apache.hadoop.hive.ql.exec.AbstractFileMergeOperator; import org.apache.hadoop.hive.ql.exec.FunctionRegistry; import org.apache.hadoop.hive.ql.exec.FunctionUtils; +import org.apache.hadoop.hive.ql.exec.MoveTask; import org.apache.hadoop.hive.ql.exec.SerializationUtilities; import org.apache.hadoop.hive.ql.exec.Utilities; import org.apache.hadoop.hive.ql.io.AcidUtils; @@ -187,7 +188,8 @@ import org.apache.hadoop.hive.ql.lockmgr.DbTxnManager; import org.apache.hadoop.hive.ql.lockmgr.HiveTxnManager; import org.apache.hadoop.hive.ql.lockmgr.LockException; -import org.apache.hadoop.hive.ql.log.PerfLogger; +import org.apache.hadoop.hive.ql.log.PerfTimer; +import org.apache.hadoop.hive.ql.log.PerfTimedAction; import org.apache.hadoop.hive.ql.optimizer.calcite.RelOptHiveTable; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.views.HiveAugmentMaterializationRule; import org.apache.hadoop.hive.ql.optimizer.listbucketingpruner.ListBucketingPrunerUtils; @@ -2184,47 +2186,51 @@ public Partition loadPartition(Path loadPath, Table tbl, Map par boolean resetStatistics, Long writeId, int stmtId, boolean isInsertOverwrite) throws HiveException { - PerfLogger perfLogger = SessionState.getPerfLogger(); - perfLogger.PerfLogBegin("MoveTask", PerfLogger.LOAD_PARTITION); + try (PerfTimer compileTimer = SessionState.getPerfTimer(MoveTask.class, + PerfTimedAction.LOAD_PARTITION)) { - // Get the partition object if it already exists - Partition oldPart = getPartition(tbl, partSpec, false); - boolean isTxnTable = AcidUtils.isTransactionalTable(tbl); + // Get the partition object if it already exists + Partition oldPart = getPartition(tbl, partSpec, false); + boolean isTxnTable = AcidUtils.isTransactionalTable(tbl); - // If config is set, table is not temporary and partition being inserted exists, capture - // the list of files added. For not yet existing partitions (insert overwrite to new partition - // or dynamic partition inserts), the add partition event will capture the list of files added. - List newFiles = Collections.synchronizedList(new ArrayList<>()); + // If config is set, table is not temporary and partition being inserted + // exists, capture the list of files added. For not yet existing + // partitions (insert overwrite to new partition or dynamic partition + // inserts), the add partition event will capture the list of files added. + List newFiles = Collections.synchronizedList(new ArrayList<>()); - Partition newTPart = loadPartitionInternal(loadPath, tbl, partSpec, oldPart, - loadFileType, inheritTableSpecs, - inheritLocation, isSkewedStoreAsSubdir, isSrcLocal, isAcidIUDoperation, - resetStatistics, writeId, stmtId, isInsertOverwrite, isTxnTable, newFiles); + Partition newTPart = + loadPartitionInternal(loadPath, tbl, partSpec, oldPart, loadFileType, + inheritTableSpecs, inheritLocation, isSkewedStoreAsSubdir, + isSrcLocal, isAcidIUDoperation, resetStatistics, writeId, stmtId, + isInsertOverwrite, isTxnTable, newFiles); - AcidUtils.TableSnapshot tableSnapshot = isTxnTable ? getTableSnapshot(tbl, writeId) : null; - if (tableSnapshot != null) { - newTPart.getTPartition().setWriteId(tableSnapshot.getWriteId()); - } - - if (oldPart == null) { - addPartitionToMetastore(newTPart, resetStatistics, tbl, tableSnapshot); - // For acid table, add the acid_write event with file list at the time of load itself. But - // it should be done after partition is created. - if (isTxnTable && (null != newFiles)) { - addWriteNotificationLog(tbl, partSpec, newFiles, writeId); - } - } else { - try { - setStatsPropAndAlterPartition(resetStatistics, tbl, newTPart, tableSnapshot); - } catch (TException e) { - LOG.error(StringUtils.stringifyException(e)); - throw new HiveException(e); + AcidUtils.TableSnapshot tableSnapshot = + isTxnTable ? getTableSnapshot(tbl, writeId) : null; + if (tableSnapshot != null) { + newTPart.getTPartition().setWriteId(tableSnapshot.getWriteId()); } - } - perfLogger.PerfLogEnd("MoveTask", PerfLogger.LOAD_PARTITION); + if (oldPart == null) { + addPartitionToMetastore(newTPart, resetStatistics, tbl, tableSnapshot); + // For acid table, add the acid_write event with file list at the time + // of load itself. But + // it should be done after partition is created. + if (isTxnTable && (null != newFiles)) { + addWriteNotificationLog(tbl, partSpec, newFiles, writeId); + } + } else { + try { + setStatsPropAndAlterPartition(resetStatistics, tbl, newTPart, + tableSnapshot); + } catch (TException e) { + LOG.error(StringUtils.stringifyException(e)); + throw new HiveException(e); + } + } - return newTPart; + return newTPart; + } } /** @@ -2276,8 +2282,6 @@ private Partition loadPartitionInternal(Path loadPath, Table tbl, Map 0, isFullAcidTable, isManaged); } } - perfLogger.PerfLogEnd("MoveTask", PerfLogger.FILE_MOVES); + moveTimer.close(); Partition newTPart = oldPart != null ? oldPart : new Partition(tbl, partSpec, newPartPath); alterPartitionSpecInMemory(tbl, partSpec, newTPart.getTPartition(), inheritTableSpecs, newPartPath.toString()); validatePartition(newTPart); @@ -2826,8 +2831,8 @@ private void constructOneLBLocationMap(FileStatus fSta, final boolean resetStatistics, final AcidUtils.Operation operation, boolean isInsertOverwrite) throws HiveException { - PerfLogger perfLogger = SessionState.getPerfLogger(); - perfLogger.PerfLogBegin("MoveTask", PerfLogger.LOAD_DYNAMIC_PARTITIONS); + PerfTimer moveTimer = SessionState.getPerfTimer(MoveTask.class, + PerfTimedAction.LOAD_DYNAMIC_PARTITIONS); // Get all valid partition paths and existing partitions for them (if any) final Table tbl = getTable(tableName); @@ -3018,7 +3023,7 @@ private void constructOneLBLocationMap(FileStatus fSta, } LOG.info("Loaded " + result.size() + "partitionsToAdd"); - perfLogger.PerfLogEnd("MoveTask", PerfLogger.LOAD_DYNAMIC_PARTITIONS); + moveTimer.close(); return result; } catch (TException te) { @@ -3054,8 +3059,8 @@ public void loadTable(Path loadPath, String tableName, LoadFileType loadFileType boolean isSkewedStoreAsSubdir, boolean isAcidIUDoperation, boolean resetStatistics, Long writeId, int stmtId, boolean isInsertOverwrite) throws HiveException { - PerfLogger perfLogger = SessionState.getPerfLogger(); - perfLogger.PerfLogBegin("MoveTask", PerfLogger.LOAD_TABLE); + PerfTimer loadTimer = SessionState.getPerfTimer(MoveTask.class, + PerfTimedAction.LOAD_TABLE); List newFiles = null; Table tbl = getTable(tableName); @@ -3100,8 +3105,6 @@ public void loadTable(Path loadPath, String tableName, LoadFileType loadFileType Utilities.FILE_OP_LOGGER.debug("moving " + loadPath + " to " + tblPath + " (replace = " + loadFileType + ")"); - perfLogger.PerfLogBegin("MoveTask", PerfLogger.FILE_MOVES); - boolean isManaged = tbl.getTableType() == TableType.MANAGED_TABLE; if (loadFileType == LoadFileType.REPLACE_ALL && !isTxnTable) { @@ -3121,7 +3124,7 @@ public void loadTable(Path loadPath, String tableName, LoadFileType loadFileType throw new HiveException("addFiles: filesystem error in check phase", e); } } - perfLogger.PerfLogEnd("MoveTask", PerfLogger.FILE_MOVES); + loadTimer.close(); } if (!this.getConf().getBoolVar(HiveConf.ConfVars.HIVESTATSAUTOGATHER)) { LOG.debug("setting table statistics false for " + tbl.getDbName() + "." + tbl.getTableName()); @@ -3163,7 +3166,7 @@ public void loadTable(Path loadPath, String tableName, LoadFileType loadFileType fireInsertEvent(tbl, null, (loadFileType == LoadFileType.REPLACE_ALL), newFiles); } - perfLogger.PerfLogEnd("MoveTask", PerfLogger.LOAD_TABLE); + loadTimer.close(); } /** diff --git a/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveMaterializedViewsRegistry.java b/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveMaterializedViewsRegistry.java index 4592f5ec34..b0ed9d4735 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveMaterializedViewsRegistry.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveMaterializedViewsRegistry.java @@ -57,7 +57,8 @@ import org.apache.hadoop.hive.metastore.api.FieldSchema; import org.apache.hadoop.hive.metastore.conf.MetastoreConf; import org.apache.hadoop.hive.ql.exec.ColumnInfo; -import org.apache.hadoop.hive.ql.log.PerfLogger; +import org.apache.hadoop.hive.ql.log.PerfTimedAction; +import org.apache.hadoop.hive.ql.log.PerfTimer; import org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException; import org.apache.hadoop.hive.ql.optimizer.calcite.HiveTypeSystemImpl; import org.apache.hadoop.hive.ql.optimizer.calcite.RelOptHiveTable; @@ -89,7 +90,6 @@ public final class HiveMaterializedViewsRegistry { private static final Logger LOG = LoggerFactory.getLogger(HiveMaterializedViewsRegistry.class); - private static final String CLASS_NAME = HiveMaterializedViewsRegistry.class.getName(); /* Singleton */ private static final HiveMaterializedViewsRegistry SINGLETON = new HiveMaterializedViewsRegistry(); @@ -166,9 +166,9 @@ public void run() { SessionState ss = new SessionState(db.getConf()); ss.setIsHiveServerQuery(true); // All is served from HS2, we do not need e.g. Tez sessions SessionState.start(ss); - PerfLogger perfLogger = SessionState.getPerfLogger(); - perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.MATERIALIZED_VIEWS_REGISTRY_REFRESH); - try { + try (PerfTimer compileTimer = + SessionState.getPerfTimer(HiveMaterializedViewsRegistry.class, + PerfTimedAction.MATERIALIZED_VIEWS_REGISTRY_REFRESH)) { if (initialized.get()) { for (Table mvTable : db.getAllMaterializedViewObjectsForRewriting()) { RelOptMaterialization existingMV = getRewritingMaterializedView(mvTable.getDbName(), mvTable.getTableName()); @@ -200,7 +200,6 @@ public void run() { LOG.error("Problem connecting to the metastore when initializing the view registry", e); } } - perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.MATERIALIZED_VIEWS_REGISTRY_REFRESH); } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java index 25e9cd0482..ed15b502ef 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java @@ -52,7 +52,7 @@ public class Optimizer { private ParseContext pctx; private List transformations; - private static final Logger LOG = LoggerFactory.getLogger(Optimizer.class.getName()); + private static final Logger LOG = LoggerFactory.getLogger(Optimizer.class); /** * Create the list of transformations. @@ -243,7 +243,8 @@ public ParseContext optimize() throws SemanticException { for (Transform t : transformations) { t.beginPerfLogging(); pctx = t.transform(pctx); - t.endPerfLogging(t.toString()); + t.endPerfLogging(); + LOG.debug("Transform: {}", t); } return pctx; } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/Transform.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/Transform.java index 6c57797177..407fd02f1f 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/Transform.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/Transform.java @@ -18,7 +18,8 @@ package org.apache.hadoop.hive.ql.optimizer; -import org.apache.hadoop.hive.ql.log.PerfLogger; +import org.apache.hadoop.hive.ql.log.PerfTimedAction; +import org.apache.hadoop.hive.ql.log.PerfTimer; import org.apache.hadoop.hive.ql.parse.ParseContext; import org.apache.hadoop.hive.ql.parse.SemanticException; import org.apache.hadoop.hive.ql.session.SessionState; @@ -30,27 +31,25 @@ * perform all the optimizations, and then return the updated parse context. */ public abstract class Transform { + + private PerfTimer perfTimer; + /** * All transformation steps implement this interface. - * - * @param pctx - * input parse context + * + * @param pctx input parse context * @return ParseContext * @throws SemanticException */ - public abstract ParseContext transform(ParseContext pctx) throws SemanticException; - + public abstract ParseContext transform(ParseContext pctx) + throws SemanticException; + public void beginPerfLogging() { - PerfLogger perfLogger = SessionState.getPerfLogger(); - perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.OPTIMIZER); + this.perfTimer = + SessionState.getPerfTimer(this.getClass(), PerfTimedAction.OPTIMIZER); } public void endPerfLogging() { - PerfLogger perfLogger = SessionState.getPerfLogger(); - perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER); + this.perfTimer.close(); } - public void endPerfLogging(String additionalInfo) { - PerfLogger perfLogger = SessionState.getPerfLogger(); - perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER, additionalInfo); - } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionPruner.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionPruner.java index 673d8580d5..63afab72e7 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionPruner.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionPruner.java @@ -37,7 +37,8 @@ import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator; import org.apache.hadoop.hive.ql.exec.FunctionRegistry; import org.apache.hadoop.hive.ql.exec.TableScanOperator; -import org.apache.hadoop.hive.ql.log.PerfLogger; +import org.apache.hadoop.hive.ql.log.PerfTimer; +import org.apache.hadoop.hive.ql.log.PerfTimedAction; import org.apache.hadoop.hive.ql.metadata.Hive; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.metadata.Partition; @@ -75,9 +76,7 @@ */ public class PartitionPruner extends Transform { - // The log - public static final String CLASS_NAME = PartitionPruner.class.getName(); - public static final Logger LOG = LoggerFactory.getLogger(CLASS_NAME); + public static final Logger LOG = LoggerFactory.getLogger(PartitionPruner.class); /* * (non-Javadoc) @@ -447,18 +446,15 @@ private static PrunedPartitionList getPartitionsFromServer(Table tab, final Stri // Now filter. List partitions = new ArrayList(); boolean hasUnknownPartitions = false; - PerfLogger perfLogger = SessionState.getPerfLogger(); if (!doEvalClientSide) { - perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.PARTITION_RETRIEVING); - try { + try (PerfTimer runJobTimer = SessionState.getPerfTimer(PartitionPruner.class, + PerfTimedAction.PARTITION_RETRIEVING)) { hasUnknownPartitions = Hive.get().getPartitionsByExpr( tab, compactExpr, conf, partitions); } catch (IMetaStoreClient.IncompatibleMetastoreException ime) { // TODO: backward compat for Hive <= 0.12. Can be removed later. LOG.warn("Metastore doesn't support getPartitionsByExpr", ime); doEvalClientSide = true; - } finally { - perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.PARTITION_RETRIEVING); } } if (doEvalClientSide) { @@ -479,12 +475,13 @@ private static PrunedPartitionList getPartitionsFromServer(Table tab, final Stri } } - private static Set getAllPartitions(Table tab) throws HiveException { - PerfLogger perfLogger = SessionState.getPerfLogger(); - perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.PARTITION_RETRIEVING); - Set result = Hive.get().getAllPartitionsOf(tab); - perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.PARTITION_RETRIEVING); - return result; + private static Set getAllPartitions(Table tab) + throws HiveException { + try ( + PerfTimer runJobTimer = SessionState.getPerfTimer(PartitionPruner.class, + PerfTimedAction.PARTITION_RETRIEVING)) { + return Hive.get().getAllPartitionsOf(tab); + } } /** @@ -496,27 +493,36 @@ private static PrunedPartitionList getPartitionsFromServer(Table tab, final Stri * @param conf Hive Configuration object, can not be NULL. * @return true iff the partition pruning expression contains non-partition columns. */ - static private boolean pruneBySequentialScan(Table tab, List partitions, - ExprNodeGenericFuncDesc prunerExpr, HiveConf conf) throws HiveException, MetaException { - PerfLogger perfLogger = SessionState.getPerfLogger(); - perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.PRUNE_LISTING); + static private boolean pruneBySequentialScan(Table tab, + List partitions, ExprNodeGenericFuncDesc prunerExpr, + HiveConf conf) throws HiveException, MetaException { + + final boolean hasUnknownPartitions; + final List partNames; + + try (PerfTimer runJobTimer = SessionState + .getPerfTimer(PartitionPruner.class, PerfTimedAction.PRUNE_LISTING)) { - List partNames = Hive.get().getPartitionNames( - tab.getDbName(), tab.getTableName(), (short) -1); + partNames = Hive.get().getPartitionNames(tab.getDbName(), + tab.getTableName(), (short) -1); - String defaultPartitionName = conf.getVar(HiveConf.ConfVars.DEFAULTPARTITIONNAME); - List partCols = extractPartColNames(tab); - List partColTypeInfos = extractPartColTypes(tab); + String defaultPartitionName = + conf.getVar(HiveConf.ConfVars.DEFAULTPARTITIONNAME); + List partCols = extractPartColNames(tab); + List partColTypeInfos = extractPartColTypes(tab); - boolean hasUnknownPartitions = prunePartitionNames( - partCols, partColTypeInfos, prunerExpr, defaultPartitionName, partNames); - perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.PRUNE_LISTING); + hasUnknownPartitions = prunePartitionNames(partCols, partColTypeInfos, + prunerExpr, defaultPartitionName, partNames); + } + + try ( + PerfTimer runJobTimer = SessionState.getPerfTimer(PartitionPruner.class, + PerfTimedAction.PARTITION_RETRIEVING)) { - perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.PARTITION_RETRIEVING); - if (!partNames.isEmpty()) { - partitions.addAll(Hive.get().getPartitionsByNames(tab, partNames)); + if (!partNames.isEmpty()) { + partitions.addAll(Hive.get().getPartitionsByNames(tab, partNames)); + } } - perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.PARTITION_RETRIEVING); return hasUnknownPartitions; } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java index 91ec00b9c6..48cd75ca2b 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java @@ -133,7 +133,8 @@ import org.apache.hadoop.hive.ql.exec.RowSchema; import org.apache.hadoop.hive.ql.exec.Utilities; import org.apache.hadoop.hive.ql.lib.Node; -import org.apache.hadoop.hive.ql.log.PerfLogger; +import org.apache.hadoop.hive.ql.log.PerfTimedAction; +import org.apache.hadoop.hive.ql.log.PerfTimer; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.metadata.NotNullConstraint; import org.apache.hadoop.hive.ql.metadata.PrimaryKeyInfo; @@ -1759,10 +1760,10 @@ public RelNode apply(RelOptCluster cluster, RelOptSchema relOptSchema, SchemaPlu this.cluster = optCluster; this.relOptSchema = relOptSchema; - PerfLogger perfLogger = SessionState.getPerfLogger(); // 1. Gen Calcite Plan - perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.OPTIMIZER); - try { + try (PerfTimer compileTimer = SessionState.getPerfTimer( + CalcitePlanner.class, PerfTimedAction.OPTIMIZER, + "Calcite: Plan generation")) { calciteGenPlan = genLogicalPlan(getQB(), true, null, null); // if it is to create view, we do not use table alias resultSchema = convertRowSchemaToResultSetSchema(relToHiveRR.get(calciteGenPlan), @@ -1772,7 +1773,6 @@ public RelNode apply(RelOptCluster cluster, RelOptSchema relOptSchema, SchemaPlu semanticException = e; throw new RuntimeException(e); } - perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER, "Calcite: Plan generation"); // Create executor RexExecutor executorProvider = new HiveRexExecutorImpl(optCluster); @@ -1873,8 +1873,6 @@ private RelNode applyPreJoinOrderingTransforms(RelNode basePlan, RelMetadataProv // Partition Pruning; otherwise Expression evaluation may try to execute // corelated sub query. - PerfLogger perfLogger = SessionState.getPerfLogger(); - final int maxCNFNodeCount = conf.getIntVar(HiveConf.ConfVars.HIVE_CBO_CNF_NODES_LIMIT); final int minNumORClauses = conf.getIntVar(HiveConf.ConfVars.HIVEPOINTLOOKUPOPTIMIZERMIN); @@ -2009,10 +2007,12 @@ private RelNode applyPreJoinOrderingTransforms(RelNode basePlan, RelMetadataProv } // Trigger program - perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.OPTIMIZER); - basePlan = executeProgram(basePlan, program.build(), mdProvider, executorProvider); - perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER, - "Calcite: Prejoin ordering transformation"); + try (PerfTimer optimizerTimer = SessionState.getPerfTimer( + CalcitePlanner.class, PerfTimedAction.OPTIMIZER, + "Calcite: Prejoin ordering transformation")) { + basePlan = executeProgram(basePlan, program.build(), mdProvider, + executorProvider); + } return basePlan; } @@ -2020,7 +2020,6 @@ private RelNode applyPreJoinOrderingTransforms(RelNode basePlan, RelMetadataProv private RelNode applyMaterializedViewRewriting(RelOptPlanner planner, RelNode basePlan, RelMetadataProvider mdProvider, RexExecutor executorProvider) { final RelOptCluster optCluster = basePlan.getCluster(); - final PerfLogger perfLogger = SessionState.getPerfLogger(); final boolean useMaterializedViewsRegistry = !conf.get(HiveConf.ConfVars.HIVE_SERVER2_MATERIALIZED_VIEWS_REGISTRY_IMPL.varname) .equals("DUMMY"); @@ -2102,51 +2101,58 @@ private RelNode copyNodeScan(RelNode scan) { return calcitePreMVRewritingPlan; } - perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.OPTIMIZER); - - if (mvRebuild) { - // If it is a materialized view rebuild, we use the HepPlanner, since we only have - // one MV and we would like to use it to create incremental maintenance plans - final HepProgramBuilder program = new HepProgramBuilder(); - generatePartialProgram(program, true, HepMatchOrder.TOP_DOWN, - HiveMaterializedViewRule.MATERIALIZED_VIEW_REWRITING_RULES); - // Add materialization for rebuild to planner - assert materializations.size() == 1; - // Optimize plan - basePlan = executeProgram(basePlan, program.build(), mdProvider, executorProvider, materializations); - } else { - // If this is not a rebuild, we use Volcano planner as the decision - // on whether to use MVs or not and which MVs to use should be cost-based - optCluster.invalidateMetadataQuery(); - RelMetadataQuery.THREAD_PROVIDERS.set(JaninoRelMetadataProvider.DEFAULT); + try (PerfTimer compileTimer = SessionState.getPerfTimer( + CalcitePlanner.class, PerfTimedAction.OPTIMIZER, + "Calcite: View-based rewriting")) { - // Add materializations to planner - for (RelOptMaterialization materialization : materializations) { - planner.addMaterialization(materialization); - } - // Add rule to split aggregate with grouping sets (if any) - planner.addRule(HiveAggregateSplitRule.INSTANCE); - // Add view-based rewriting rules to planner - for (RelOptRule rule : HiveMaterializedViewRule.MATERIALIZED_VIEW_REWRITING_RULES) { - planner.addRule(rule); + if (mvRebuild) { + // If it is a materialized view rebuild, we use the HepPlanner, since + // we only have + // one MV and we would like to use it to create incremental + // maintenance plans + final HepProgramBuilder program = new HepProgramBuilder(); + generatePartialProgram(program, true, HepMatchOrder.TOP_DOWN, + HiveMaterializedViewRule.MATERIALIZED_VIEW_REWRITING_RULES); + // Add materialization for rebuild to planner + assert materializations.size() == 1; + // Optimize plan + basePlan = executeProgram(basePlan, program.build(), mdProvider, + executorProvider, materializations); + } else { + // If this is not a rebuild, we use Volcano planner as the decision + // on whether to use MVs or not and which MVs to use should be + // cost-based + optCluster.invalidateMetadataQuery(); + RelMetadataQuery.THREAD_PROVIDERS + .set(JaninoRelMetadataProvider.DEFAULT); + + // Add materializations to planner + for (RelOptMaterialization materialization : materializations) { + planner.addMaterialization(materialization); + } + // Add rule to split aggregate with grouping sets (if any) + planner.addRule(HiveAggregateSplitRule.INSTANCE); + // Add view-based rewriting rules to planner + for (RelOptRule rule : HiveMaterializedViewRule.MATERIALIZED_VIEW_REWRITING_RULES) { + planner.addRule(rule); + } + // Partition pruner rule + planner.addRule(HiveFilterProjectTSTransposeRule.INSTANCE); + planner.addRule(new HivePartitionPruneRule(conf)); + + // Optimize plan + planner.setRoot(basePlan); + basePlan = planner.findBestExp(); + // Remove view-based rewriting rules from planner + planner.clear(); + + // Restore default cost model + optCluster.invalidateMetadataQuery(); + RelMetadataQuery.THREAD_PROVIDERS + .set(JaninoRelMetadataProvider.of(mdProvider)); } - // Partition pruner rule - planner.addRule(HiveFilterProjectTSTransposeRule.INSTANCE); - planner.addRule(new HivePartitionPruneRule(conf)); - - // Optimize plan - planner.setRoot(basePlan); - basePlan = planner.findBestExp(); - // Remove view-based rewriting rules from planner - planner.clear(); - - // Restore default cost model - optCluster.invalidateMetadataQuery(); - RelMetadataQuery.THREAD_PROVIDERS.set(JaninoRelMetadataProvider.of(mdProvider)); } - perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER, "Calcite: View-based rewriting"); - List materializedViewsUsedOriginalPlan = getMaterializedViewsUsed(calcitePreMVRewritingPlan); List
materializedViewsUsedAfterRewrite = getMaterializedViewsUsed(basePlan); if (materializedViewsUsedOriginalPlan.size() == materializedViewsUsedAfterRewrite.size()) { @@ -2225,7 +2231,6 @@ private RelNode copyNodeScan(RelNode scan) { * @return */ private RelNode applyJoinOrderingTransform(RelNode basePlan, RelMetadataProvider mdProvider, RexExecutor executorProvider) { - PerfLogger perfLogger = SessionState.getPerfLogger(); final HepProgramBuilder program = new HepProgramBuilder(); // Remove Projects between Joins so that JoinToMultiJoinRule can merge them to MultiJoin @@ -2236,13 +2241,16 @@ private RelNode applyJoinOrderingTransform(RelNode basePlan, RelMetadataProvider generatePartialProgram(program, false, HepMatchOrder.BOTTOM_UP, new JoinToMultiJoinRule(HiveJoin.class), new LoptOptimizeJoinRule(HiveRelFactories.HIVE_BUILDER)); - perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.OPTIMIZER); RelNode calciteOptimizedPlan; - try { - calciteOptimizedPlan = executeProgram(basePlan, program.build(), mdProvider, executorProvider); + try (PerfTimer compileTimer = SessionState.getPerfTimer( + CalcitePlanner.class, PerfTimedAction.OPTIMIZER, + "Calcite: Join Reordering")) { + calciteOptimizedPlan = executeProgram(basePlan, program.build(), + mdProvider, executorProvider); } catch (Exception e) { if (noColsMissingStats.get() > 0) { - LOG.warn("Missing column stats (see previous messages), skipping join reordering in CBO"); + LOG.warn( + "Missing column stats (see previous messages), skipping join reordering in CBO"); noColsMissingStats.set(0); calciteOptimizedPlan = basePlan; disableSemJoinReordering = false; @@ -2250,8 +2258,6 @@ private RelNode applyJoinOrderingTransform(RelNode basePlan, RelMetadataProvider throw e; } } - perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER, "Calcite: Join Reordering"); - return calciteOptimizedPlan; } @@ -2267,7 +2273,6 @@ private RelNode applyJoinOrderingTransform(RelNode basePlan, RelMetadataProvider * @return */ private RelNode applyPostJoinOrderingTransform(RelNode basePlan, RelMetadataProvider mdProvider, RexExecutor executorProvider) { - PerfLogger perfLogger = SessionState.getPerfLogger(); final HepProgramBuilder program = new HepProgramBuilder(); @@ -2373,11 +2378,12 @@ private RelNode applyPostJoinOrderingTransform(RelNode basePlan, RelMetadataProv } // Trigger program - perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.OPTIMIZER); - basePlan = executeProgram(basePlan, program.build(), mdProvider, executorProvider); - perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER, - "Calcite: Postjoin ordering transformation"); - + try (PerfTimer compileTimer = SessionState.getPerfTimer( + CalcitePlanner.class, PerfTimedAction.OPTIMIZER, + "Calcite: Postjoin ordering transformation")) { + basePlan = executeProgram(basePlan, program.build(), mdProvider, + executorProvider); + } return basePlan; } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java index 7d5807720b..581c7bbc02 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java @@ -19,7 +19,6 @@ import com.google.common.collect.ListMultimap; import com.google.common.collect.Sets; -import java.io.Serializable; import java.util.ArrayList; import java.util.Collection; import java.util.Comparator; @@ -33,6 +32,7 @@ import java.util.List; import java.util.Map; import java.util.Map.Entry; +import java.util.Optional; import java.util.Set; import java.util.SortedSet; import java.util.Stack; @@ -79,7 +79,8 @@ import org.apache.hadoop.hive.ql.lib.PreOrderOnceWalker; import org.apache.hadoop.hive.ql.lib.Rule; import org.apache.hadoop.hive.ql.lib.RuleRegExp; -import org.apache.hadoop.hive.ql.log.PerfLogger; +import org.apache.hadoop.hive.ql.log.PerfTimedAction; +import org.apache.hadoop.hive.ql.log.PerfTimer; import org.apache.hadoop.hive.ql.metadata.Hive; import org.apache.hadoop.hive.ql.optimizer.ConstantPropagate; import org.apache.hadoop.hive.ql.optimizer.ConstantPropagateProcCtx.ConstantPropagateOption; @@ -163,68 +164,81 @@ public void init(QueryState queryState, LogHelper console, Hive db) { @Override protected void optimizeOperatorPlan(ParseContext pCtx, Set inputs, Set outputs) throws SemanticException { - PerfLogger perfLogger = SessionState.getPerfLogger(); // Create the context for the walker OptimizeTezProcContext procCtx = new OptimizeTezProcContext(conf, pCtx, inputs, outputs); - perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.TEZ_COMPILER); - runTopNKeyOptimization(procCtx); - perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.TEZ_COMPILER, "Run top n key optimization"); + try (PerfTimer compileTimer = SessionState.getPerfTimer(TezCompiler.class, + PerfTimedAction.TEZ_COMPILER, "Run top n key optimization")) { + runTopNKeyOptimization(procCtx); + } - perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.TEZ_COMPILER); - // setup dynamic partition pruning where possible - runDynamicPartitionPruning(procCtx, inputs, outputs); - perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.TEZ_COMPILER, "Setup dynamic partition pruning"); + try (PerfTimer compileTimer = SessionState.getPerfTimer(TezCompiler.class, + PerfTimedAction.TEZ_COMPILER, "Setup dynamic partition pruning")) { + // setup dynamic partition pruning where possible + runDynamicPartitionPruning(procCtx, inputs, outputs); + } // need to run this; to get consistent filterop conditions(for operator tree matching) if (procCtx.conf.getBoolVar(ConfVars.HIVEOPTCONSTANTPROPAGATION)) { new ConstantPropagate(ConstantPropagateOption.SHORTCUT).transform(procCtx.parseContext); } - perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.TEZ_COMPILER); - // setup stats in the operator plan - runStatsAnnotation(procCtx); - perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.TEZ_COMPILER, "Setup stats in the operator plan"); + try (PerfTimer compileTimer = SessionState.getPerfTimer(TezCompiler.class, + PerfTimedAction.TEZ_COMPILER, "Setup stats in the operator plan")) { + // setup stats in the operator plan + runStatsAnnotation(procCtx); + } // run Sorted dynamic partition optimization if(HiveConf.getBoolVar(procCtx.conf, HiveConf.ConfVars.DYNAMICPARTITIONING) && HiveConf.getVar(procCtx.conf, HiveConf.ConfVars.DYNAMICPARTITIONINGMODE).equals("nonstrict") && !HiveConf.getBoolVar(procCtx.conf, HiveConf.ConfVars.HIVEOPTLISTBUCKETING)) { - perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.TEZ_COMPILER); - new SortedDynPartitionOptimizer().transform(procCtx.parseContext); - perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.TEZ_COMPILER, "Sorted dynamic partition optimization"); + try (PerfTimer compileTimer = SessionState.getPerfTimer(TezCompiler.class, + PerfTimedAction.TEZ_COMPILER, + "Sorted dynamic partition optimization")) { + new SortedDynPartitionOptimizer().transform(procCtx.parseContext); + } } - if(HiveConf.getBoolVar(procCtx.conf, HiveConf.ConfVars.HIVEOPTREDUCEDEDUPLICATION) + if (HiveConf.getBoolVar(procCtx.conf, + HiveConf.ConfVars.HIVEOPTREDUCEDEDUPLICATION) || procCtx.parseContext.hasAcidWrite()) { - perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.TEZ_COMPILER); - // Dynamic sort partition adds an extra RS therefore need to de-dup - new ReduceSinkDeDuplication().transform(procCtx.parseContext); - // there is an issue with dedup logic wherein SELECT is created with wrong columns - // NonBlockingOpDeDupProc fixes that - // (kind of hackish, the issue in de-dup should be fixed but it needs more investigation) - new NonBlockingOpDeDupProc().transform(procCtx.parseContext); - perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.TEZ_COMPILER, "Reduce Sink de-duplication"); + try (PerfTimer compileTimer = SessionState.getPerfTimer(TezCompiler.class, + PerfTimedAction.TEZ_COMPILER, + "Reduce Sink de-duplication")) { + // Dynamic sort partition adds an extra RS therefore need to de-dup + new ReduceSinkDeDuplication().transform(procCtx.parseContext); + // there is an issue with dedup logic wherein SELECT is created with + // wrong columns NonBlockingOpDeDupProc fixes that (kind of hackish, the + // issue in de-dup should be fixed but it needs more investigation) + new NonBlockingOpDeDupProc().transform(procCtx.parseContext); + } } - perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.TEZ_COMPILER); - // run the optimizations that use stats for optimization - runStatsDependentOptimizations(procCtx, inputs, outputs); - perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.TEZ_COMPILER, "Run the optimizations that use stats for optimization"); + try (PerfTimer compileTimer = SessionState.getPerfTimer(TezCompiler.class, + PerfTimedAction.TEZ_COMPILER, + "Run the optimizations that use stats for optimization")) { + // run the optimizations that use stats for optimization + runStatsDependentOptimizations(procCtx, inputs, outputs); + } - perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.TEZ_COMPILER); - if(procCtx.conf.getBoolVar(ConfVars.HIVEOPTJOINREDUCEDEDUPLICATION)) { - new ReduceSinkJoinDeDuplication().transform(procCtx.parseContext); + try (PerfTimer compileTimer = SessionState.getPerfTimer(TezCompiler.class, + PerfTimedAction.TEZ_COMPILER, + "Run reduce sink after join algorithm selection")) { + if (procCtx.conf.getBoolVar(ConfVars.HIVEOPTJOINREDUCEDEDUPLICATION)) { + new ReduceSinkJoinDeDuplication().transform(procCtx.parseContext); + } } - perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.TEZ_COMPILER, "Run reduce sink after join algorithm selection"); semijoinRemovalBasedTransformations(procCtx, inputs, outputs); - perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.TEZ_COMPILER); - if(procCtx.conf.getBoolVar(ConfVars.HIVE_SHARED_WORK_OPTIMIZATION)) { - new SharedWorkOptimizer().transform(procCtx.parseContext); + try (PerfTimer compileTimer = SessionState.getPerfTimer(TezCompiler.class, + PerfTimedAction.TEZ_COMPILER, "Shared scans optimization")) { + if (procCtx.conf.getBoolVar(ConfVars.HIVE_SHARED_WORK_OPTIMIZATION)) { + new SharedWorkOptimizer().transform(procCtx.parseContext); + } } - perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.TEZ_COMPILER, "Shared scans optimization"); + // need a new run of the constant folding because we might have created lots // of "and true and true" conditions. @@ -234,10 +248,12 @@ protected void optimizeOperatorPlan(ParseContext pCtx, Set inputs, new ConstantPropagate(ConstantPropagateOption.SHORTCUT).transform(procCtx.parseContext); } - perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.TEZ_COMPILER); - AuxOpTreeSignature.linkAuxSignatures(procCtx.parseContext); - markOperatorsWithUnstableRuntimeStats(procCtx); - perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.TEZ_COMPILER, "markOperatorsWithUnstableRuntimeStats"); + try (PerfTimer compileTimer = SessionState.getPerfTimer(TezCompiler.class, + PerfTimedAction.TEZ_COMPILER, + "markOperatorsWithUnstableRuntimeStats")) { + AuxOpTreeSignature.linkAuxSignatures(procCtx.parseContext); + markOperatorsWithUnstableRuntimeStats(procCtx); + } // ATTENTION : DO NOT, I REPEAT, DO NOT WRITE ANYTHING AFTER updateBucketingVersionForUpgrade() // ANYTHING WHICH NEEDS TO BE ADDED MUST BE ADDED ABOVE @@ -483,7 +499,6 @@ private void runStatsDependentOptimizations(OptimizeTezProcContext procCtx, private void semijoinRemovalBasedTransformations(OptimizeTezProcContext procCtx, Set inputs, Set outputs) throws SemanticException { - PerfLogger perfLogger = SessionState.getPerfLogger(); final boolean dynamicPartitionPruningEnabled = procCtx.conf.getBoolVar(ConfVars.TEZ_DYNAMIC_PARTITION_PRUNING); @@ -493,58 +508,72 @@ private void semijoinRemovalBasedTransformations(OptimizeTezProcContext procCtx, final boolean extendedReductionEnabled = dynamicPartitionPruningEnabled && procCtx.conf.getBoolVar(ConfVars.TEZ_DYNAMIC_PARTITION_PRUNING_EXTENDED); - perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.TEZ_COMPILER); - if (dynamicPartitionPruningEnabled) { - runRemoveDynamicPruningOptimization(procCtx, inputs, outputs); + try (PerfTimer compileTimer = SessionState.getPerfTimer(TezCompiler.class, + PerfTimedAction.TEZ_COMPILER, "Run remove dynamic pruning by size")) { + if (dynamicPartitionPruningEnabled) { + runRemoveDynamicPruningOptimization(procCtx, inputs, outputs); + } } - perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.TEZ_COMPILER, "Run remove dynamic pruning by size"); if (semiJoinReductionEnabled) { - perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.TEZ_COMPILER); - markSemiJoinForDPP(procCtx); - perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.TEZ_COMPILER, "Mark certain semijoin edges important based "); - - // Remove any semi join edges from Union Op - perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.TEZ_COMPILER); - removeSemiJoinEdgesForUnion(procCtx); - perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.TEZ_COMPILER, - "Remove any semi join edge between Union and RS"); - - // Remove any parallel edge between semijoin and mapjoin. - perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.TEZ_COMPILER); - removeSemijoinsParallelToMapJoin(procCtx); - perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.TEZ_COMPILER, "Remove any parallel edge between semijoin and mapjoin"); - - // Remove semijoin optimization if SMB join is created. - perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.TEZ_COMPILER); - removeSemijoinOptimizationFromSMBJoins(procCtx); - perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.TEZ_COMPILER, "Remove semijoin optimizations if needed"); - - // Remove bloomfilter if no stats generated - perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.TEZ_COMPILER); - removeSemiJoinIfNoStats(procCtx); - perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.TEZ_COMPILER, "Remove bloom filter optimizations if needed"); - - // Removing semijoin optimization when it may not be beneficial - perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.TEZ_COMPILER); - removeSemijoinOptimizationByBenefit(procCtx); - perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.TEZ_COMPILER, "Remove Semijoins based on cost benefits"); - } - - // after the stats phase we might have some cyclic dependencies that we need - // to take care of. - perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.TEZ_COMPILER); - if (dynamicPartitionPruningEnabled) { - runCycleAnalysisForPartitionPruning(procCtx, inputs, outputs); - } - perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.TEZ_COMPILER, "Run cycle analysis for partition pruning"); - - // remove redundant dpp and semijoins - perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.TEZ_COMPILER); - if (extendedReductionEnabled) { - removeRedundantSemijoinAndDpp(procCtx); - } - perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.TEZ_COMPILER, "Remove redundant semijoin reduction"); + try (PerfTimer compileTimer = SessionState.getPerfTimer(TezCompiler.class, + PerfTimedAction.TEZ_COMPILER, + "Mark certain semijoin edges important based")) { + markSemiJoinForDPP(procCtx); + } + + try (PerfTimer compileTimer = SessionState.getPerfTimer(TezCompiler.class, + PerfTimedAction.TEZ_COMPILER, + "Remove any semi join edge between Union and RS")) { + // Remove any semi join edges from Union Op + removeSemiJoinEdgesForUnion(procCtx); + } + + try (PerfTimer compileTimer = SessionState.getPerfTimer(TezCompiler.class, + PerfTimedAction.TEZ_COMPILER, + "Remove any parallel edge between semijoin and mapjoin")) { + removeSemijoinsParallelToMapJoin(procCtx); + } + + try (PerfTimer compileTimer = SessionState.getPerfTimer(TezCompiler.class, + PerfTimedAction.TEZ_COMPILER, + "Remove semijoin optimizations if needed")) { + // Remove semijoin optimization if SMB join is created. + removeSemijoinOptimizationFromSMBJoins(procCtx); + } + + try (PerfTimer compileTimer = SessionState.getPerfTimer(TezCompiler.class, + PerfTimedAction.TEZ_COMPILER, + "Remove bloom filter optimizations if needed")) { + // Remove bloomfilter if no stats generated + removeSemiJoinIfNoStats(procCtx); + } + + try (PerfTimer compileTimer = SessionState.getPerfTimer(TezCompiler.class, + PerfTimedAction.TEZ_COMPILER, + "Remove Semijoins based on cost benefits")) { + // Removing semijoin optimization when it may not be beneficial + removeSemijoinOptimizationByBenefit(procCtx); + } + } + + try (PerfTimer compileTimer = SessionState.getPerfTimer(TezCompiler.class, + PerfTimedAction.TEZ_COMPILER, + "Run cycle analysis for partition pruning")) { + // after the stats phase we might have some cyclic dependencies that we + // need to take care of. + if (dynamicPartitionPruningEnabled) { + runCycleAnalysisForPartitionPruning(procCtx, inputs, outputs); + } + } + + try (PerfTimer compileTimer = SessionState.getPerfTimer(TezCompiler.class, + PerfTimedAction.TEZ_COMPILER, "Remove redundant semijoin reduction")) { + // remove redundant dpp and semijoins + if (extendedReductionEnabled) { + removeRedundantSemijoinAndDpp(procCtx); + } + } } private void runRemoveDynamicPruningOptimization(OptimizeTezProcContext procCtx, @@ -598,11 +627,18 @@ private void runDynamicPartitionPruning(OptimizeTezProcContext procCtx, Set> rootTasks, ParseContext pCtx, + List> mvTask, Set inputs, + Set outputs) throws SemanticException { + try (PerfTimer compileTimer = SessionState.getPerfTimer(TezCompiler.class, + PerfTimedAction.TEZ_COMPILER, "generateTaskTree")) { + doGenerateTaskTree(rootTasks, pCtx, mvTask, inputs, outputs); + } + } + + private void doGenerateTaskTree(List> rootTasks, ParseContext pCtx, List> mvTask, Set inputs, Set outputs) throws SemanticException { - PerfLogger perfLogger = SessionState.getPerfLogger(); - perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.TEZ_COMPILER); ParseContext tempParseContext = getParseContext(pCtx, rootTasks); GenTezUtils utils = new GenTezUtils(); GenTezWork genTezWork = new GenTezWork(utils); @@ -686,7 +722,6 @@ protected void generateTaskTree(List> rootTasks, ParseContext pCtx, LOG.debug("Handling AppMasterEventOperator: " + event); GenTezUtils.processAppMasterEvent(procCtx, event); } - perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.TEZ_COMPILER, "generateTaskTree"); } @Override @@ -743,12 +778,17 @@ protected void decideExecMode(List> rootTasks, Context ctx, // currently all Tez work is on the cluster return; } - @Override protected void optimizeTaskPlan(List> rootTasks, ParseContext pCtx, Context ctx) throws SemanticException { - PerfLogger perfLogger = SessionState.getPerfLogger(); - perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.TEZ_COMPILER); + try (PerfTimer compileTimer = SessionState.getPerfTimer(TezCompiler.class, + PerfTimedAction.TEZ_COMPILER, "optimizeTaskPlan")) { + doOptimizeTaskPlan(rootTasks, pCtx, ctx); + } + } + + private void doOptimizeTaskPlan(List> rootTasks, ParseContext pCtx, + Context ctx) throws SemanticException { PhysicalContext physicalCtx = new PhysicalContext(conf, pCtx, pCtx.getContext(), rootTasks, pCtx.getFetchTask()); @@ -809,9 +849,6 @@ protected void optimizeTaskPlan(List> rootTasks, ParseContext pCtx, if (physicalCtx.getContext().getExplainAnalyze() != null) { new AnnotateRunTimeStatsOptimizer().resolve(physicalCtx); } - - perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.TEZ_COMPILER, "optimizeTaskPlan"); - return; } private static class SMBJoinOpProcContext implements NodeProcessorCtx { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/spark/SparkCompiler.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/spark/SparkCompiler.java index 24429b4a1f..d1221f57a5 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/spark/SparkCompiler.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/spark/SparkCompiler.java @@ -17,7 +17,6 @@ */ package org.apache.hadoop.hive.ql.parse.spark; -import java.io.Serializable; import java.util.ArrayList; import java.util.HashMap; import java.util.HashSet; @@ -60,7 +59,8 @@ import org.apache.hadoop.hive.ql.lib.Rule; import org.apache.hadoop.hive.ql.lib.RuleRegExp; import org.apache.hadoop.hive.ql.lib.TypeRule; -import org.apache.hadoop.hive.ql.log.PerfLogger; +import org.apache.hadoop.hive.ql.log.PerfTimer; +import org.apache.hadoop.hive.ql.log.PerfTimedAction; import org.apache.hadoop.hive.ql.optimizer.ConstantPropagate; import org.apache.hadoop.hive.ql.optimizer.ConstantPropagateProcCtx; import org.apache.hadoop.hive.ql.optimizer.DynamicPartitionPruningOptimization; @@ -101,8 +101,6 @@ * Cloned from TezCompiler. */ public class SparkCompiler extends TaskCompiler { - private static final String CLASS_NAME = SparkCompiler.class.getName(); - private static final PerfLogger PERF_LOGGER = SessionState.getPerfLogger(); public SparkCompiler() { } @@ -110,54 +108,62 @@ public SparkCompiler() { @Override protected void optimizeOperatorPlan(ParseContext pCtx, Set inputs, Set outputs) throws SemanticException { - PERF_LOGGER.PerfLogBegin(CLASS_NAME, PerfLogger.SPARK_OPTIMIZE_OPERATOR_TREE); - OptimizeSparkProcContext procCtx = new OptimizeSparkProcContext(conf, pCtx, inputs, outputs); + try (PerfTimer runJobTimer = SessionState.getPerfTimer(SparkCompiler.class, + PerfTimedAction.SPARK_OPTIMIZE_OPERATOR_TREE)) { - // Run Spark Dynamic Partition Pruning - runDynamicPartitionPruning(procCtx); + OptimizeSparkProcContext procCtx = + new OptimizeSparkProcContext(conf, pCtx, inputs, outputs); - // Annotation OP tree with statistics - runStatsAnnotation(procCtx); + // Run Spark Dynamic Partition Pruning + runDynamicPartitionPruning(procCtx); - // Run Dynamic Partitioning sort Optimization. - runDynPartitionSortOptimizations(procCtx); + // Annotation OP tree with statistics + runStatsAnnotation(procCtx); - // Set reducer parallelism - runSetReducerParallelism(procCtx); + // Run Dynamic Partitioning sort Optimization. + runDynPartitionSortOptimizations(procCtx); - // Run Join releated optimizations - runJoinOptimizations(procCtx); + // Set reducer parallelism + runSetReducerParallelism(procCtx); - if(conf.isSparkDPPAny()){ - // Remove DPP based on expected size of the output data - runRemoveDynamicPruning(procCtx); + // Run Join releated optimizations + runJoinOptimizations(procCtx); - // Remove cyclic dependencies for DPP - runCycleAnalysisForPartitionPruning(procCtx); + if (conf.isSparkDPPAny()) { + // Remove DPP based on expected size of the output data + runRemoveDynamicPruning(procCtx); - // Remove nested DPPs - SparkUtilities.removeNestedDPP(procCtx); - } + // Remove cyclic dependencies for DPP + runCycleAnalysisForPartitionPruning(procCtx); - // Re-run constant propagation so we fold any new constants introduced by the operator optimizers - // Specifically necessary for DPP because we might have created lots of "and true and true" conditions - if (procCtx.getConf().getBoolVar(HiveConf.ConfVars.HIVEOPTCONSTANTPROPAGATION)) { - new ConstantPropagate(ConstantPropagateProcCtx.ConstantPropagateOption.SHORTCUT).transform(pCtx); - } + // Remove nested DPPs + SparkUtilities.removeNestedDPP(procCtx); + } - // ATTENTION : DO NOT, I REPEAT, DO NOT WRITE ANYTHING AFTER updateBucketingVersionForUpgrade() - // ANYTHING WHICH NEEDS TO BE ADDED MUST BE ADDED ABOVE - // This call updates the bucketing version of final ReduceSinkOp based on - // the bucketing version of FileSinkOp. This operation must happen at the - // end to ensure there is no further rewrite of plan which may end up - // removing/updating the ReduceSinkOp as was the case with SortedDynPartitionOptimizer - // Update bucketing version of ReduceSinkOp if needed - // Note: This has been copied here from TezCompiler, change seems needed for bucketing to work - // properly moving forward. - updateBucketingVersionForUpgrade(procCtx); - - PERF_LOGGER.PerfLogEnd(CLASS_NAME, PerfLogger.SPARK_OPTIMIZE_OPERATOR_TREE); + // Re-run constant propagation so we fold any new constants introduced by + // the operator optimizers Specifically necessary for DPP because we + // might have created lots of "and true and true" conditions + if (procCtx.getConf() + .getBoolVar(HiveConf.ConfVars.HIVEOPTCONSTANTPROPAGATION)) { + new ConstantPropagate( + ConstantPropagateProcCtx.ConstantPropagateOption.SHORTCUT) + .transform(pCtx); + } + + // ATTENTION : DO NOT, I REPEAT, DO NOT WRITE ANYTHING AFTER + // updateBucketingVersionForUpgrade() + // ANYTHING WHICH NEEDS TO BE ADDED MUST BE ADDED ABOVE + // This call updates the bucketing version of final ReduceSinkOp based on + // the bucketing version of FileSinkOp. This operation must happen at the + // end to ensure there is no further rewrite of plan which may end up + // removing/updating the ReduceSinkOp as was the case with + // SortedDynPartitionOptimizer + // Update bucketing version of ReduceSinkOp if needed + // Note: This has been copied here from TezCompiler, change seems needed + // for bucketing to work properly moving forward. + updateBucketingVersionForUpgrade(procCtx); + } } private void runRemoveDynamicPruning(OptimizeSparkProcContext procCtx) throws SemanticException { @@ -364,76 +370,84 @@ private void runDynPartitionSortOptimizations(OptimizeSparkProcContext procCtx) protected void generateTaskTree(List> rootTasks, ParseContext pCtx, List> mvTask, Set inputs, Set outputs) throws SemanticException { - PERF_LOGGER.PerfLogBegin(CLASS_NAME, PerfLogger.SPARK_GENERATE_TASK_TREE); - - GenSparkUtils utils = GenSparkUtils.getUtils(); - utils.resetSequenceNumber(); - - ParseContext tempParseContext = getParseContext(pCtx, rootTasks); - GenSparkProcContext procCtx = new GenSparkProcContext( - conf, tempParseContext, mvTask, rootTasks, inputs, outputs, pCtx.getTopOps()); - - // -------------------------------- First Pass ---------------------------------- // - // Identify SparkPartitionPruningSinkOperators, and break OP tree if necessary - - Map opRules = new LinkedHashMap(); - opRules.put(new RuleRegExp("Clone OP tree for PartitionPruningSink", - SparkPartitionPruningSinkOperator.getOperatorName() + "%"), - new SplitOpTreeForDPP()); - - Dispatcher disp = new DefaultRuleDispatcher(null, opRules, procCtx); - GraphWalker ogw = new GenSparkWorkWalker(disp, procCtx); - - List topNodes = new ArrayList(); - topNodes.addAll(pCtx.getTopOps().values()); - ogw.startWalking(topNodes, null); - - // -------------------------------- Second Pass ---------------------------------- // - // Process operator tree in two steps: first we process the extra op trees generated - // in the first pass. Then we process the main op tree, and the result task will depend - // on the task generated in the first pass. - topNodes.clear(); - topNodes.addAll(procCtx.topOps.values()); - generateTaskTreeHelper(procCtx, topNodes); - - // If this set is not empty, it means we need to generate a separate task for collecting - // the partitions used. - if (!procCtx.clonedPruningTableScanSet.isEmpty()) { - SparkTask pruningTask = SparkUtilities.createSparkTask(conf); - SparkTask mainTask = procCtx.currentTask; - pruningTask.addDependentTask(procCtx.currentTask); - procCtx.rootTasks.remove(procCtx.currentTask); - procCtx.rootTasks.add(pruningTask); - procCtx.currentTask = pruningTask; - + try (PerfTimer runJobTimer = SessionState.getPerfTimer(SparkCompiler.class, + PerfTimedAction.SPARK_GENERATE_TASK_TREE)) { + + GenSparkUtils utils = GenSparkUtils.getUtils(); + utils.resetSequenceNumber(); + + ParseContext tempParseContext = getParseContext(pCtx, rootTasks); + GenSparkProcContext procCtx = + new GenSparkProcContext(conf, tempParseContext, mvTask, rootTasks, + inputs, outputs, pCtx.getTopOps()); + + // ------------ First Pass ------------------- // + // Identify SparkPartitionPruningSinkOperators, and break OP tree if + // necessary + + Map opRules = + new LinkedHashMap(); + opRules.put( + new RuleRegExp("Clone OP tree for PartitionPruningSink", + SparkPartitionPruningSinkOperator.getOperatorName() + "%"), + new SplitOpTreeForDPP()); + + Dispatcher disp = new DefaultRuleDispatcher(null, opRules, procCtx); + GraphWalker ogw = new GenSparkWorkWalker(disp, procCtx); + + List topNodes = new ArrayList(); + topNodes.addAll(pCtx.getTopOps().values()); + ogw.startWalking(topNodes, null); + + // ------------------ Second Pass ------------------// + // Process operator tree in two steps: first we process the extra op + // trees generated in the first pass. Then we process the main op tree, + // and the result task will depend on the task generated in the first pass. topNodes.clear(); - topNodes.addAll(procCtx.clonedPruningTableScanSet); + topNodes.addAll(procCtx.topOps.values()); generateTaskTreeHelper(procCtx, topNodes); - procCtx.currentTask = mainTask; - } + // If this set is not empty, it means we need to generate a separate task + // for collecting + // the partitions used. + if (!procCtx.clonedPruningTableScanSet.isEmpty()) { + SparkTask pruningTask = SparkUtilities.createSparkTask(conf); + SparkTask mainTask = procCtx.currentTask; + pruningTask.addDependentTask(procCtx.currentTask); + procCtx.rootTasks.remove(procCtx.currentTask); + procCtx.rootTasks.add(pruningTask); + procCtx.currentTask = pruningTask; + + topNodes.clear(); + topNodes.addAll(procCtx.clonedPruningTableScanSet); + generateTaskTreeHelper(procCtx, topNodes); + + procCtx.currentTask = mainTask; + } - // -------------------------------- Post Pass ---------------------------------- // + // -------------------------------- Post Pass + // ---------------------------------- // - // we need to clone some operator plans and remove union operators still - for (BaseWork w : procCtx.workWithUnionOperators) { - GenSparkUtils.getUtils().removeUnionOperators(procCtx, w); - } + // we need to clone some operator plans and remove union operators still + for (BaseWork w : procCtx.workWithUnionOperators) { + GenSparkUtils.getUtils().removeUnionOperators(procCtx, w); + } - // we need to fill MapWork with 'local' work and bucket information for SMB Join. - GenSparkUtils.getUtils().annotateMapWork(procCtx); + // we need to fill MapWork with 'local' work and bucket information for + // SMB Join. + GenSparkUtils.getUtils().annotateMapWork(procCtx); - // finally make sure the file sink operators are set up right - for (FileSinkOperator fileSink : procCtx.fileSinkSet) { - GenSparkUtils.getUtils().processFileSink(procCtx, fileSink); - } + // finally make sure the file sink operators are set up right + for (FileSinkOperator fileSink : procCtx.fileSinkSet) { + GenSparkUtils.getUtils().processFileSink(procCtx, fileSink); + } - // Process partition pruning sinks - for (Operator prunerSink : procCtx.pruningSinkSet) { - utils.processPartitionPruningSink(procCtx, (SparkPartitionPruningSinkOperator) prunerSink); + // Process partition pruning sinks + for (Operator prunerSink : procCtx.pruningSinkSet) { + utils.processPartitionPruningSink(procCtx, + (SparkPartitionPruningSinkOperator) prunerSink); + } } - - PERF_LOGGER.PerfLogEnd(CLASS_NAME, PerfLogger.SPARK_GENERATE_TASK_TREE); } private void generateTaskTreeHelper(GenSparkProcContext procCtx, List topNodes) @@ -576,66 +590,69 @@ protected void decideExecMode(List> rootTasks, Context ctx, @Override protected void optimizeTaskPlan(List> rootTasks, ParseContext pCtx, Context ctx) throws SemanticException { - PERF_LOGGER.PerfLogBegin(CLASS_NAME, PerfLogger.SPARK_OPTIMIZE_TASK_TREE); - PhysicalContext physicalCtx = new PhysicalContext(conf, pCtx, pCtx.getContext(), rootTasks, - pCtx.getFetchTask()); + try (PerfTimer runJobTimer = SessionState.getPerfTimer(SparkCompiler.class, + PerfTimedAction.SPARK_OPTIMIZE_TASK_TREE)) { - physicalCtx = new SplitSparkWorkResolver().resolve(physicalCtx); + PhysicalContext physicalCtx = new PhysicalContext(conf, pCtx, + pCtx.getContext(), rootTasks, pCtx.getFetchTask()); - if (conf.getBoolVar(HiveConf.ConfVars.HIVESKEWJOIN)) { - (new SparkSkewJoinResolver()).resolve(physicalCtx); - } else { - LOG.debug("Skipping runtime skew join optimization"); - } + physicalCtx = new SplitSparkWorkResolver().resolve(physicalCtx); - physicalCtx = new SparkMapJoinResolver().resolve(physicalCtx); + if (conf.getBoolVar(HiveConf.ConfVars.HIVESKEWJOIN)) { + (new SparkSkewJoinResolver()).resolve(physicalCtx); + } else { + LOG.debug("Skipping runtime skew join optimization"); + } - if (conf.isSparkDPPAny()) { - physicalCtx = new SparkDynamicPartitionPruningResolver().resolve(physicalCtx); - } + physicalCtx = new SparkMapJoinResolver().resolve(physicalCtx); - if (conf.getBoolVar(HiveConf.ConfVars.HIVENULLSCANOPTIMIZE)) { - physicalCtx = new NullScanOptimizer().resolve(physicalCtx); - } else { - LOG.debug("Skipping null scan query optimization"); - } + if (conf.isSparkDPPAny()) { + physicalCtx = + new SparkDynamicPartitionPruningResolver().resolve(physicalCtx); + } - if (conf.getBoolVar(HiveConf.ConfVars.HIVEMETADATAONLYQUERIES)) { - physicalCtx = new MetadataOnlyOptimizer().resolve(physicalCtx); - } else { - LOG.debug("Skipping metadata only query optimization"); - } + if (conf.getBoolVar(HiveConf.ConfVars.HIVENULLSCANOPTIMIZE)) { + physicalCtx = new NullScanOptimizer().resolve(physicalCtx); + } else { + LOG.debug("Skipping null scan query optimization"); + } - if (conf.getBoolVar(HiveConf.ConfVars.HIVE_CHECK_CROSS_PRODUCT)) { - physicalCtx = new SparkCrossProductCheck().resolve(physicalCtx); - } else { - LOG.debug("Skipping cross product analysis"); - } + if (conf.getBoolVar(HiveConf.ConfVars.HIVEMETADATAONLYQUERIES)) { + physicalCtx = new MetadataOnlyOptimizer().resolve(physicalCtx); + } else { + LOG.debug("Skipping metadata only query optimization"); + } - if (conf.getBoolVar(HiveConf.ConfVars.HIVE_VECTORIZATION_ENABLED)) { - (new Vectorizer()).resolve(physicalCtx); - } else { - LOG.debug("Skipping vectorization"); - } + if (conf.getBoolVar(HiveConf.ConfVars.HIVE_CHECK_CROSS_PRODUCT)) { + physicalCtx = new SparkCrossProductCheck().resolve(physicalCtx); + } else { + LOG.debug("Skipping cross product analysis"); + } - if (!"none".equalsIgnoreCase(conf.getVar(HiveConf.ConfVars.HIVESTAGEIDREARRANGE))) { - (new StageIDsRearranger()).resolve(physicalCtx); - } else { - LOG.debug("Skipping stage id rearranger"); - } + if (conf.getBoolVar(HiveConf.ConfVars.HIVE_VECTORIZATION_ENABLED)) { + (new Vectorizer()).resolve(physicalCtx); + } else { + LOG.debug("Skipping vectorization"); + } - if (conf.getBoolVar(HiveConf.ConfVars.HIVE_COMBINE_EQUIVALENT_WORK_OPTIMIZATION)) { - new CombineEquivalentWorkResolver().resolve(physicalCtx); - } else { - LOG.debug("Skipping combine equivalent work optimization"); - } + if (!"none".equalsIgnoreCase( + conf.getVar(HiveConf.ConfVars.HIVESTAGEIDREARRANGE))) { + (new StageIDsRearranger()).resolve(physicalCtx); + } else { + LOG.debug("Skipping stage id rearranger"); + } - if (physicalCtx.getContext().getExplainAnalyze() != null) { - new AnnotateRunTimeStatsOptimizer().resolve(physicalCtx); - } + if (conf.getBoolVar( + HiveConf.ConfVars.HIVE_COMBINE_EQUIVALENT_WORK_OPTIMIZATION)) { + new CombineEquivalentWorkResolver().resolve(physicalCtx); + } else { + LOG.debug("Skipping combine equivalent work optimization"); + } - PERF_LOGGER.PerfLogEnd(CLASS_NAME, PerfLogger.SPARK_OPTIMIZE_TASK_TREE); - return; + if (physicalCtx.getContext().getExplainAnalyze() != null) { + new AnnotateRunTimeStatsOptimizer().resolve(physicalCtx); + } + } } private void updateBucketingVersionForUpgrade(OptimizeSparkProcContext procCtx) { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java b/ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java index e224f2c348..e3034cc252 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java @@ -39,6 +39,7 @@ import java.util.LinkedList; import java.util.List; import java.util.Map; +import java.util.Optional; import java.util.Set; import java.util.UUID; import java.util.concurrent.CancellationException; @@ -88,6 +89,9 @@ import org.apache.hadoop.hive.ql.lockmgr.LockException; import org.apache.hadoop.hive.ql.lockmgr.TxnManagerFactory; import org.apache.hadoop.hive.ql.log.PerfLogger; +import org.apache.hadoop.hive.ql.log.PerfTimedAction; +import org.apache.hadoop.hive.ql.log.PerfTimer; +import org.apache.hadoop.hive.ql.log.PerfTimerFactory; import org.apache.hadoop.hive.ql.metadata.Hive; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.metadata.HiveUtils; @@ -1863,22 +1867,26 @@ public boolean isAuthorizationModeV2(){ } /** - * @return Tries to return an instance of the class whose name is configured in - * hive.exec.perf.logger, but if it can't it just returns an instance of - * the base PerfLogger class + * @return Tries to return an instance of the class whose name is configured + * in hive.exec.perf.logger, but if it can't it just returns an + * instance of the base PerfLogger class * + * @deprecated Use {@link PerfTimer}s */ + @Deprecated public static PerfLogger getPerfLogger() { return getPerfLogger(false); } /** * @param resetPerfLogger - * @return Tries to return an instance of the class whose name is configured in - * hive.exec.perf.logger, but if it can't it just returns an instance of - * the base PerfLogger class + * @return Tries to return an instance of the class whose name is configured + * in hive.exec.perf.logger, but if it can't it just returns an + * instance of the base PerfLogger class * + * @deprecated Use {@link PerfTimer}s */ + @Deprecated public static PerfLogger getPerfLogger(boolean resetPerfLogger) { SessionState ss = get(); if (ss == null) { @@ -1888,6 +1896,22 @@ public static PerfLogger getPerfLogger(boolean resetPerfLogger) { } } + public static PerfTimer getPerfTimer(final Class clazz, final PerfTimedAction action) { + return getPerfTimer(clazz, action, null); + } + + public static PerfTimer getPerfTimer(final Class clazz, + final PerfTimedAction action, final String extra) { + final Optional ss = Optional.ofNullable(get()); + final Optional conf = + ss.isPresent() ? Optional.of(ss.get().getConf()) : Optional.empty(); + final Optional sessionId = + ss.isPresent() ? Optional.of(ss.get().getSessionId()) + : Optional.empty(); + return PerfTimerFactory.getPerfTimer(sessionId, conf, clazz, action, + Optional.ofNullable(extra)); + } + public TezSessionState getTezSession() { return tezSessionState; }