diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java index 3141a7e981..877ca869ba 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java @@ -18,55 +18,19 @@ package org.apache.hadoop.hive.ql.exec; -import static org.apache.commons.lang.StringUtils.join; -import static org.apache.hadoop.hive.metastore.Warehouse.DEFAULT_CATALOG_NAME; -import static org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_STORAGE; - -import java.io.BufferedWriter; -import java.io.DataOutputStream; -import java.io.FileNotFoundException; -import java.io.IOException; -import java.io.OutputStreamWriter; -import java.io.Serializable; -import java.io.Writer; -import java.net.URI; -import java.net.URISyntaxException; -import java.nio.charset.StandardCharsets; -import java.sql.SQLException; -import java.util.AbstractList; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collections; -import java.util.Comparator; -import java.util.HashMap; -import java.util.HashSet; -import java.util.Iterator; -import java.util.LinkedHashMap; -import java.util.List; -import java.util.Map; -import java.util.Map.Entry; -import java.util.Set; -import java.util.SortedSet; -import java.util.TreeMap; -import java.util.TreeSet; -import java.util.regex.Matcher; -import java.util.regex.Pattern; -import java.util.concurrent.ExecutionException; - +import com.google.common.annotations.VisibleForTesting; import com.google.common.collect.ImmutableSet; - +import com.google.common.collect.Iterables; +import com.google.common.collect.Lists; +import com.google.common.util.concurrent.ListenableFuture; import org.apache.commons.lang.StringUtils; import org.apache.hadoop.fs.FSDataOutputStream; -import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.FsShell; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.common.FileUtils; -import org.apache.hadoop.hive.common.JavaUtils; import org.apache.hadoop.hive.common.StatsSetupConst; -import org.apache.hadoop.hive.common.ValidTxnList; import org.apache.hadoop.hive.common.ValidTxnWriteIdList; -import org.apache.hadoop.hive.common.ValidWriteIdList; import org.apache.hadoop.hive.common.type.HiveDecimal; import org.apache.hadoop.hive.conf.Constants; import org.apache.hadoop.hive.conf.HiveConf; @@ -74,7 +38,6 @@ import org.apache.hadoop.hive.metastore.DefaultHiveMetaHook; import org.apache.hadoop.hive.metastore.HiveMetaHook; import org.apache.hadoop.hive.metastore.HiveMetaStoreUtils; -import org.apache.hadoop.hive.metastore.IMetaStoreClient; import org.apache.hadoop.hive.metastore.PartitionDropOptions; import org.apache.hadoop.hive.metastore.StatObjectConverter; import org.apache.hadoop.hive.metastore.TableType; @@ -149,7 +112,6 @@ import org.apache.hadoop.hive.ql.lockmgr.HiveLockObject; import org.apache.hadoop.hive.ql.lockmgr.HiveLockObject.HiveLockObjectData; import org.apache.hadoop.hive.ql.lockmgr.HiveTxnManager; -import org.apache.hadoop.hive.ql.lockmgr.LockException; import org.apache.hadoop.hive.ql.metadata.CheckConstraint; import org.apache.hadoop.hive.ql.metadata.CheckResult; import org.apache.hadoop.hive.ql.metadata.DefaultConstraint; @@ -211,7 +173,7 @@ import org.apache.hadoop.hive.ql.plan.FileMergeDesc; import org.apache.hadoop.hive.ql.plan.GrantDesc; import org.apache.hadoop.hive.ql.plan.GrantRevokeRoleDDL; -import org.apache.hadoop.hive.ql.plan.InsertTableDesc; +import org.apache.hadoop.hive.ql.plan.InsertCommitHookDesc; import org.apache.hadoop.hive.ql.plan.KillQueryDesc; import org.apache.hadoop.hive.ql.plan.ListBucketingCtx; import org.apache.hadoop.hive.ql.plan.LoadMultiFilesDesc; @@ -291,10 +253,39 @@ import org.slf4j.LoggerFactory; import org.stringtemplate.v4.ST; -import com.google.common.annotations.VisibleForTesting; -import com.google.common.collect.Iterables; -import com.google.common.collect.Lists; -import com.google.common.util.concurrent.ListenableFuture; +import java.io.BufferedWriter; +import java.io.DataOutputStream; +import java.io.FileNotFoundException; +import java.io.IOException; +import java.io.OutputStreamWriter; +import java.io.Serializable; +import java.io.Writer; +import java.net.URI; +import java.net.URISyntaxException; +import java.nio.charset.StandardCharsets; +import java.sql.SQLException; +import java.util.AbstractList; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.Comparator; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Iterator; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; +import java.util.Map.Entry; +import java.util.Set; +import java.util.SortedSet; +import java.util.TreeMap; +import java.util.TreeSet; +import java.util.concurrent.ExecutionException; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +import static org.apache.commons.lang.StringUtils.join; +import static org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_STORAGE; /** * DDLTask implementation. @@ -602,9 +593,9 @@ public int execute(DriverContext driverContext) { if (cacheMetadataDesc != null) { return cacheMetadata(db, cacheMetadataDesc); } - InsertTableDesc insertTableDesc = work.getInsertTableDesc(); - if (insertTableDesc != null) { - return insertCommitWork(db, insertTableDesc); + InsertCommitHookDesc insertCommitHookDesc = work.getInsertCommitHookDesc(); + if (insertCommitHookDesc != null) { + return insertCommitWork(db, insertCommitHookDesc); } PreInsertTableDesc preInsertTableDesc = work.getPreInsertTableDesc(); if (preInsertTableDesc != null) { @@ -860,22 +851,22 @@ private int preInsertWork(Hive db, PreInsertTableDesc preInsertTableDesc) throws return 0; } - private int insertCommitWork(Hive db, InsertTableDesc insertTableDesc) throws MetaException { + private int insertCommitWork(Hive db, InsertCommitHookDesc insertCommitHookDesc) throws MetaException { boolean failed = true; - HiveMetaHook hook = insertTableDesc.getTable().getStorageHandler().getMetaHook(); + HiveMetaHook hook = insertCommitHookDesc.getTable().getStorageHandler().getMetaHook(); if (hook == null || !(hook instanceof DefaultHiveMetaHook)) { return 0; } DefaultHiveMetaHook hiveMetaHook = (DefaultHiveMetaHook) hook; try { - hiveMetaHook.commitInsertTable(insertTableDesc.getTable().getTTable(), - insertTableDesc.isOverwrite() + hiveMetaHook.commitInsertTable(insertCommitHookDesc.getTable().getTTable(), + insertCommitHookDesc.isOverwrite() ); failed = false; } finally { if (failed) { - hiveMetaHook.rollbackInsertTable(insertTableDesc.getTable().getTTable(), - insertTableDesc.isOverwrite() + hiveMetaHook.rollbackInsertTable(insertCommitHookDesc.getTable().getTTable(), + insertCommitHookDesc.isOverwrite() ); } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/Task.java ql/src/java/org/apache/hadoop/hive/ql/exec/Task.java index f329570cc9..240208a645 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/Task.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/Task.java @@ -68,7 +68,6 @@ protected static transient Logger LOG = LoggerFactory.getLogger(Task.class); protected int taskTag; private boolean isLocalMode =false; - private boolean retryCmdWhenFail = false; public static final int NO_TAG = 0; public static final int COMMON_JOIN = 1; diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/QueryPlanPostProcessor.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/QueryPlanPostProcessor.java index c0ce684eb9..a91f45e204 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/QueryPlanPostProcessor.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/QueryPlanPostProcessor.java @@ -17,9 +17,6 @@ */ package org.apache.hadoop.hive.ql.optimizer; -import java.util.List; -import java.util.Set; - import org.apache.hadoop.hive.ql.exec.FileSinkOperator; import org.apache.hadoop.hive.ql.exec.Operator; import org.apache.hadoop.hive.ql.exec.OperatorUtils; @@ -53,6 +50,9 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import java.util.List; +import java.util.Set; + /** * Finds Acid FileSinkDesc objects which can be created in the physical (disconnected) plan, e.g. * {@link org.apache.hadoop.hive.ql.parse.GenTezUtils#removeUnionOperators(GenTezProcContext, BaseWork, int)} diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezProcContext.java ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezProcContext.java index 0df581ae43..f977fc1b59 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezProcContext.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezProcContext.java @@ -18,16 +18,6 @@ package org.apache.hadoop.hive.ql.parse; -import java.io.Serializable; -import java.util.HashMap; -import java.util.LinkedHashMap; -import java.util.LinkedHashSet; -import java.util.LinkedList; -import java.util.List; -import java.util.Map; -import java.util.Set; -import java.util.concurrent.atomic.AtomicInteger; - import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.exec.AppMasterEventOperator; @@ -55,6 +45,16 @@ import org.apache.hadoop.hive.ql.plan.TezWork; import org.apache.hadoop.hive.ql.plan.UnionWork; +import java.io.Serializable; +import java.util.HashMap; +import java.util.LinkedHashMap; +import java.util.LinkedHashSet; +import java.util.LinkedList; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.concurrent.atomic.AtomicInteger; + /** * GenTezProcContext. GenTezProcContext maintains information * about the tasks and operators as we walk the operator tree diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/ParseContext.java ql/src/java/org/apache/hadoop/hive/ql/parse/ParseContext.java index d890b319e9..89121e3c8d 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/ParseContext.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/ParseContext.java @@ -18,16 +18,6 @@ package org.apache.hadoop.hive.ql.parse; -import java.io.Serializable; -import java.util.ArrayList; -import java.util.Collection; -import java.util.Collections; -import java.util.HashMap; -import java.util.HashSet; -import java.util.List; -import java.util.Map; -import java.util.Set; - import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.Context; import org.apache.hadoop.hive.ql.QueryProperties; @@ -61,6 +51,16 @@ import org.apache.hadoop.hive.ql.plan.MapJoinDesc; import org.apache.hadoop.hive.ql.plan.TableDesc; +import java.io.Serializable; +import java.util.ArrayList; +import java.util.Collection; +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; + /** * Parse Context: The current parse context. This is passed to the optimizer * which then transforms the operator tree using the parse context. All the @@ -122,7 +122,7 @@ private MaterializedViewDesc materializedViewUpdateDesc; private boolean reduceSinkAddedBySortedDynPartition; - private Map viewProjectToViewSchema; + private Map viewProjectToViewSchema; private ColumnAccessInfo columnAccessInfo; private boolean needViewColumnAuthorization; private Set acidFileSinks = Collections.emptySet(); @@ -141,7 +141,6 @@ public ParseContext() { } /** - * @param conf * @param opToPartPruner * map from table scan operator to partition pruner * @param opToPartList diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java index 9e49bfac69..b6e8e17e64 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java @@ -18,33 +18,12 @@ package org.apache.hadoop.hive.ql.parse; -import static org.apache.hadoop.hive.conf.HiveConf.ConfVars.HIVESTATSDBCLASS; - -import java.io.FileNotFoundException; -import java.io.IOException; -import java.io.Serializable; -import java.security.AccessControlException; -import java.util.ArrayDeque; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Deque; -import java.util.HashMap; -import java.util.HashSet; -import java.util.Iterator; -import java.util.LinkedHashMap; -import java.util.LinkedList; -import java.util.List; -import java.util.Map; -import java.util.Map.Entry; -import java.util.Queue; -import java.util.Set; -import java.util.TreeSet; -import java.util.UUID; -import java.util.function.Supplier; -import java.util.regex.Pattern; -import java.util.regex.PatternSyntaxException; -import java.util.stream.Collectors; - +import com.google.common.base.Splitter; +import com.google.common.base.Strings; +import com.google.common.collect.Iterables; +import com.google.common.collect.Sets; +import com.google.common.math.IntMath; +import com.google.common.math.LongMath; import org.antlr.runtime.ClassicToken; import org.antlr.runtime.CommonToken; import org.antlr.runtime.Token; @@ -65,9 +44,9 @@ import org.apache.hadoop.hive.common.FileUtils; import org.apache.hadoop.hive.common.ObjectPair; import org.apache.hadoop.hive.common.StatsSetupConst; +import org.apache.hadoop.hive.common.StatsSetupConst.StatDB; import org.apache.hadoop.hive.common.ValidTxnList; import org.apache.hadoop.hive.common.ValidTxnWriteIdList; -import org.apache.hadoop.hive.common.StatsSetupConst.StatDB; import org.apache.hadoop.hive.common.metrics.common.MetricsConstant; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.conf.HiveConf.ConfVars; @@ -121,6 +100,7 @@ import org.apache.hadoop.hive.ql.exec.TaskFactory; import org.apache.hadoop.hive.ql.exec.UnionOperator; import org.apache.hadoop.hive.ql.exec.Utilities; +import org.apache.hadoop.hive.ql.exec.tez.TezTask; import org.apache.hadoop.hive.ql.hooks.Entity; import org.apache.hadoop.hive.ql.hooks.ReadEntity; import org.apache.hadoop.hive.ql.hooks.WriteEntity; @@ -185,6 +165,7 @@ import org.apache.hadoop.hive.ql.plan.AggregationDesc; import org.apache.hadoop.hive.ql.plan.AlterTableDesc; import org.apache.hadoop.hive.ql.plan.AlterTableDesc.AlterTableTypes; +import org.apache.hadoop.hive.ql.plan.BaseWork; import org.apache.hadoop.hive.ql.plan.CreateTableDesc; import org.apache.hadoop.hive.ql.plan.CreateTableLikeDesc; import org.apache.hadoop.hive.ql.plan.CreateViewDesc; @@ -203,7 +184,7 @@ import org.apache.hadoop.hive.ql.plan.ForwardDesc; import org.apache.hadoop.hive.ql.plan.GroupByDesc; import org.apache.hadoop.hive.ql.plan.HiveOperation; -import org.apache.hadoop.hive.ql.plan.InsertTableDesc; +import org.apache.hadoop.hive.ql.plan.InsertCommitHookDesc; import org.apache.hadoop.hive.ql.plan.JoinCondDesc; import org.apache.hadoop.hive.ql.plan.JoinDesc; import org.apache.hadoop.hive.ql.plan.LateralViewForwardDesc; @@ -230,9 +211,9 @@ import org.apache.hadoop.hive.ql.security.authorization.plugin.HivePrivilegeObject; import org.apache.hadoop.hive.ql.session.SessionState; import org.apache.hadoop.hive.ql.session.SessionState.ResourceType; -import org.apache.hadoop.hive.ql.udf.generic.GenericUDFArray; import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator; import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator.Mode; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFArray; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFCardinalityViolation; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFHash; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFMurmurHash; @@ -270,12 +251,33 @@ import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.security.UserGroupInformation; -import com.google.common.base.Splitter; -import com.google.common.base.Strings; -import com.google.common.collect.Iterables; -import com.google.common.collect.Sets; -import com.google.common.math.IntMath; -import com.google.common.math.LongMath; +import java.io.FileNotFoundException; +import java.io.IOException; +import java.io.Serializable; +import java.security.AccessControlException; +import java.util.ArrayDeque; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Deque; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Iterator; +import java.util.LinkedHashMap; +import java.util.LinkedList; +import java.util.List; +import java.util.Map; +import java.util.Map.Entry; +import java.util.Optional; +import java.util.Queue; +import java.util.Set; +import java.util.TreeSet; +import java.util.UUID; +import java.util.function.Supplier; +import java.util.regex.Pattern; +import java.util.regex.PatternSyntaxException; +import java.util.stream.Collectors; + +import static org.apache.hadoop.hive.conf.HiveConf.ConfVars.HIVESTATSDBCLASS; /** * Implementation of the semantic analyzer. It generates the query plan. @@ -7293,7 +7295,7 @@ protected Operator genFileSinkPlan(String dest, QB qb, Operator input) // true if it is insert overwrite. boolean overwrite = !qb.getParseInfo().isInsertIntoTable( String.format("%s.%s", dest_tab.getDbName(), dest_tab.getTableName())); - createInsertDesc(dest_tab, overwrite); + createPreInsertDesc(dest_tab, overwrite); } if (dest_tab.isMaterializedView()) { @@ -7571,7 +7573,6 @@ protected Operator genFileSinkPlan(String dest, QB qb, Operator input) ltd.setInsertOverwrite(true); } } - if (SessionState.get().isHiveServerQuery() && null != table_desc && table_desc.getSerdeClassName().equalsIgnoreCase(ThriftJDBCBinarySerDe.class.getName()) && @@ -7886,17 +7887,14 @@ private DynamicPartitionCtx checkDynPart(QB qb, QBMetaData qbm, Table dest_tab, return dpCtx; } - private void createInsertDesc(Table table, boolean overwrite) { - Task[] tasks = new Task[this.rootTasks.size()]; - tasks = this.rootTasks.toArray(tasks); + private void createPreInsertDesc(Table table, boolean overwrite) { PreInsertTableDesc preInsertTableDesc = new PreInsertTableDesc(table, overwrite); - InsertTableDesc insertTableDesc = new InsertTableDesc(table, overwrite); this.rootTasks .add(TaskFactory.get(new DDLWork(getInputs(), getOutputs(), preInsertTableDesc))); - TaskFactory - .getAndMakeChild(new DDLWork(getInputs(), getOutputs(), insertTableDesc), conf, tasks); + } + private void genAutoColumnStatsGatheringPipeline(QB qb, TableDesc table_desc, Map partSpec, Operator curr, boolean isInsertInto) throws SemanticException { String tableName = table_desc.getTableName(); @@ -12232,9 +12230,36 @@ void analyzeInternal(ASTNode ast, PlannerContextFactory pcf) throws SemanticExce } //find all Acid FileSinkOperatorS QueryPlanPostProcessor qp = new QueryPlanPostProcessor(rootTasks, acidFileSinks, ctx.getExecutionId()); + + // 10. Attach CTAS/Insert-Commit-hooks for Storage Handlers + final List ddlCommitInsertHooks = rootTasks.stream() + .filter(task -> { + if (task.getWork() instanceof DDLWork) { + DDLWork ddlWork = (DDLWork) task.getWork(); + return ddlWork.getPreInsertTableDesc() != null; + } + return false; + }) + .filter(task -> task.getWork() instanceof DDLWork) + .map(task -> (DDLWork) task.getWork()) + .filter(ddlWork -> ddlWork.getPreInsertTableDesc() != null) + .map(ddlWork -> ddlWork.getPreInsertTableDesc()) + .map(ddlPreInsertTask -> new InsertCommitHookDesc(ddlPreInsertTask.getTable(), + ddlPreInsertTask.isOverwrite())) + .collect(Collectors.toList()); + + if (ddlCommitInsertHooks.size() > 0) { + final Map insertCommitHooksMap = new HashMap<>(); + ddlCommitInsertHooks.stream() + .forEach(e -> insertCommitHooksMap.put(e.getTable().getFullyQualifiedName(), e)); + rootTasks.stream() + .filter(task ->task instanceof TezTask) + .forEach(tezTask -> attachDependency((TezTask) tezTask, insertCommitHooksMap)); + } + LOG.info("Completed plan generation"); - // 10. put accessed columns to readEntity + // 11. put accessed columns to readEntity if (HiveConf.getBoolVar(this.conf, HiveConf.ConfVars.HIVE_STATS_COLLECT_SCANCOLS)) { putAccessedColumnsToReadEntity(inputs, columnAccessInfo); } @@ -12250,6 +12275,23 @@ void analyzeInternal(ASTNode ast, PlannerContextFactory pcf) throws SemanticExce } } + private void attachDependency(TezTask tezTask, Map hooksMap) { + for (BaseWork baseWork : tezTask.getWork().getAllWorkUnsorted()) { + Optional tableName = baseWork.getAllLeafOperators().stream() + .filter(operator -> operator instanceof FileSinkOperator).map( + fileSinkOperator -> ((FileSinkOperator) fileSinkOperator).getConf().getTable() + .getFullyQualifiedName()) + .filter(name -> hooksMap.get(name) != null).findFirst(); + + if (tableName.isPresent()) { + InsertCommitHookDesc insertCommitHookDesc = hooksMap.get(tableName.get()); + tezTask.addDependentTask( + TaskFactory.get(new DDLWork(getInputs(), getOutputs(), insertCommitHookDesc), conf)); + } + } + } + + private void putAccessedColumnsToReadEntity(HashSet inputs, ColumnAccessInfo columnAccessInfo) { Map> tableToColumnAccessMap = columnAccessInfo.getTableToColumnAccessMap(); if (tableToColumnAccessMap != null && !tableToColumnAccessMap.isEmpty()) { diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/TaskCompiler.java ql/src/java/org/apache/hadoop/hive/ql/parse/TaskCompiler.java index df1d9cb6d8..95e1c31419 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/TaskCompiler.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/TaskCompiler.java @@ -18,21 +18,6 @@ package org.apache.hadoop.hive.ql.parse; -import java.io.Serializable; -import java.util.ArrayList; -import java.util.HashSet; -import java.util.Iterator; -import java.util.LinkedHashMap; -import java.util.LinkedHashSet; -import java.util.List; -import java.util.Map; -import java.util.Set; - -import org.apache.hadoop.hive.ql.exec.DDLTask; -import org.apache.hadoop.hive.ql.exec.MaterializedViewDesc; -import org.apache.hadoop.hive.ql.io.AcidUtils; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; import com.google.common.collect.Interner; import com.google.common.collect.Interners; import org.apache.hadoop.fs.Path; @@ -43,7 +28,9 @@ import org.apache.hadoop.hive.ql.Context; import org.apache.hadoop.hive.ql.ErrorMsg; import org.apache.hadoop.hive.ql.QueryState; +import org.apache.hadoop.hive.ql.exec.DDLTask; import org.apache.hadoop.hive.ql.exec.FetchTask; +import org.apache.hadoop.hive.ql.exec.MaterializedViewDesc; import org.apache.hadoop.hive.ql.exec.StatsTask; import org.apache.hadoop.hive.ql.exec.TableScanOperator; import org.apache.hadoop.hive.ql.exec.Task; @@ -51,6 +38,7 @@ import org.apache.hadoop.hive.ql.exec.Utilities; import org.apache.hadoop.hive.ql.hooks.ReadEntity; import org.apache.hadoop.hive.ql.hooks.WriteEntity; +import org.apache.hadoop.hive.ql.io.AcidUtils; import org.apache.hadoop.hive.ql.io.orc.OrcInputFormat; import org.apache.hadoop.hive.ql.metadata.Hive; import org.apache.hadoop.hive.ql.metadata.HiveException; @@ -82,6 +70,18 @@ import org.apache.hadoop.hive.serde2.thrift.ThriftFormatter; import org.apache.hadoop.hive.serde2.thrift.ThriftJDBCBinarySerDe; import org.apache.hadoop.mapred.InputFormat; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.Serializable; +import java.util.ArrayList; +import java.util.HashSet; +import java.util.Iterator; +import java.util.LinkedHashMap; +import java.util.LinkedHashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; /** * TaskCompiler is a the base class for classes that compile diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/DDLWork.java ql/src/java/org/apache/hadoop/hive/ql/plan/DDLWork.java index 98da309094..8ed3b03a84 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/DDLWork.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/DDLWork.java @@ -17,15 +17,15 @@ */ package org.apache.hadoop.hive.ql.plan; -import java.io.Serializable; -import java.util.HashSet; - import org.apache.hadoop.hive.ql.hooks.ReadEntity; import org.apache.hadoop.hive.ql.hooks.WriteEntity; import org.apache.hadoop.hive.ql.parse.AlterTablePartMergeFilesDesc; import org.apache.hadoop.hive.ql.parse.PreInsertTableDesc; import org.apache.hadoop.hive.ql.plan.Explain.Level; +import java.io.Serializable; +import java.util.HashSet; + /** * DDLWork. * @@ -35,7 +35,7 @@ // TODO: this can probably be replaced with much less code via dynamic dispatch and/or templates. private PreInsertTableDesc preInsertTableDesc; - private InsertTableDesc insertTableDesc; + private InsertCommitHookDesc insertCommitHookDesc; private AlterMaterializedViewDesc alterMVDesc; private CreateDatabaseDesc createDatabaseDesc; private SwitchDatabaseDesc switchDatabaseDesc; @@ -522,9 +522,10 @@ public DDLWork(HashSet inputs, HashSet outputs, } public DDLWork(HashSet inputs, HashSet outputs, - InsertTableDesc insertTableDesc) { + InsertCommitHookDesc insertCommitHookDesc + ) { this(inputs, outputs); - this.insertTableDesc = insertTableDesc; + this.insertCommitHookDesc = insertCommitHookDesc; } public DDLWork(HashSet inputs, HashSet outputs, @@ -1241,12 +1242,12 @@ public void setShowConfDesc(ShowConfDesc showConfDesc) { } @Explain(displayName = "Insert operator", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) - public InsertTableDesc getInsertTableDesc() { - return insertTableDesc; + public InsertCommitHookDesc getInsertCommitHookDesc() { + return insertCommitHookDesc; } - public void setInsertTableDesc(InsertTableDesc insertTableDesc) { - this.insertTableDesc = insertTableDesc; + public void setInsertCommitHookDesc(InsertCommitHookDesc insertCommitHookDesc) { + this.insertCommitHookDesc = insertCommitHookDesc; } @Explain(displayName = "Pre Insert operator", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/InsertTableDesc.java ql/src/java/org/apache/hadoop/hive/ql/plan/InsertCommitHookDesc.java similarity index 81% rename from ql/src/java/org/apache/hadoop/hive/ql/plan/InsertTableDesc.java rename to ql/src/java/org/apache/hadoop/hive/ql/plan/InsertCommitHookDesc.java index 212bc7ae74..8136506381 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/InsertTableDesc.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/InsertCommitHookDesc.java @@ -20,12 +20,13 @@ import org.apache.hadoop.hive.ql.metadata.Table; -@Explain(displayName = "Insert", explainLevels = { Explain.Level.USER, Explain.Level.DEFAULT, Explain.Level.EXTENDED }) -public class InsertTableDesc extends DDLDesc { +@Explain(displayName = "Commit-Insert-Hook", explainLevels = { Explain.Level.USER, + Explain.Level.DEFAULT, Explain.Level.EXTENDED }) +public class InsertCommitHookDesc extends DDLDesc { private final Table table; private final boolean overwrite; - public InsertTableDesc(Table table, boolean overwrite) { + public InsertCommitHookDesc(Table table, boolean overwrite) { this.table = table; this.overwrite = overwrite; } diff --git ql/src/test/results/clientpositive/druid/druidmini_dynamic_partition.q.out ql/src/test/results/clientpositive/druid/druidmini_dynamic_partition.q.out index 154e5045c7..6997bd9384 100644 --- ql/src/test/results/clientpositive/druid/druidmini_dynamic_partition.q.out +++ ql/src/test/results/clientpositive/druid/druidmini_dynamic_partition.q.out @@ -323,9 +323,9 @@ SELECT cast (`ctimestamp2` as timestamp with local time zone) as `__time`, POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-0 is a root stage - Stage-2 Stage-1 is a root stage Stage-3 is a root stage + Stage-5 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-0 @@ -336,10 +336,6 @@ STAGE PLANS: properties: COLUMN_STATS_ACCURATE - Stage: Stage-2 - Insert operator: - Insert - Stage: Stage-1 Pre Insert operator: Pre-Insert task @@ -389,6 +385,10 @@ STAGE PLANS: serde: org.apache.hadoop.hive.druid.serde.DruidSerDe name: default.druid_partitioned_table + Stage: Stage-5 + Insert operator: + Commit-Insert-Hook + PREHOOK: query: INSERT INTO TABLE druid_partitioned_table SELECT cast (`ctimestamp2` as timestamp with local time zone) as `__time`, cstring1, @@ -460,9 +460,9 @@ POSTHOOK: query: EXPLAIN INSERT OVERWRITE TABLE druid_partitioned_table POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-0 is a root stage - Stage-2 Stage-1 is a root stage Stage-3 is a root stage + Stage-5 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-0 @@ -473,10 +473,6 @@ STAGE PLANS: properties: COLUMN_STATS_ACCURATE - Stage: Stage-2 - Insert operator: - Insert - Stage: Stage-1 Pre Insert operator: Pre-Insert task @@ -526,6 +522,10 @@ STAGE PLANS: serde: org.apache.hadoop.hive.druid.serde.DruidSerDe name: default.druid_partitioned_table + Stage: Stage-5 + Insert operator: + Commit-Insert-Hook + PREHOOK: query: INSERT OVERWRITE TABLE druid_partitioned_table SELECT cast (`ctimestamp1` as timestamp with local time zone) as `__time`, cstring1, diff --git ql/src/test/results/clientpositive/druid/druidmini_mv.q.out ql/src/test/results/clientpositive/druid/druidmini_mv.q.out index c29f547e33..f2714b5a79 100644 --- ql/src/test/results/clientpositive/druid/druidmini_mv.q.out +++ ql/src/test/results/clientpositive/druid/druidmini_mv.q.out @@ -422,10 +422,10 @@ ALTER MATERIALIZED VIEW cmv_mat_view2 REBUILD POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-0 is a root stage - Stage-2 - Stage-5 depends on stages: Stage-2, Stage-1, Stage-3 + Stage-5 depends on stages: Stage-0, Stage-1, Stage-3 Stage-1 is a root stage Stage-3 is a root stage + Stage-6 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-0 @@ -436,10 +436,6 @@ STAGE PLANS: properties: COLUMN_STATS_ACCURATE - Stage: Stage-2 - Insert operator: - Insert - Stage: Stage-5 Materialized View Work @@ -492,6 +488,10 @@ STAGE PLANS: serde: org.apache.hadoop.hive.druid.serde.DruidSerDe name: default.cmv_mat_view2 + Stage: Stage-6 + Insert operator: + Commit-Insert-Hook + PREHOOK: query: ALTER MATERIALIZED VIEW cmv_mat_view2 REBUILD PREHOOK: type: QUERY PREHOOK: Input: default@cmv_basetable diff --git standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/DefaultHiveMetaHook.java standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/DefaultHiveMetaHook.java index 095794556c..4fc913c733 100644 --- standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/DefaultHiveMetaHook.java +++ standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/DefaultHiveMetaHook.java @@ -23,7 +23,7 @@ public abstract class DefaultHiveMetaHook implements HiveMetaHook { /** - * Called after successfully after INSERT [OVERWRITE] statement is executed. + * Called after successfully INSERT [OVERWRITE] statement is executed. * @param table table definition * @param overwrite true if it is INSERT OVERWRITE *