diff --git ql/src/java/org/apache/hadoop/hive/ql/Context.java ql/src/java/org/apache/hadoop/hive/ql/Context.java index 9183edf..f04aed4 100644 --- ql/src/java/org/apache/hadoop/hive/ql/Context.java +++ ql/src/java/org/apache/hadoop/hive/ql/Context.java @@ -25,9 +25,11 @@ import java.text.SimpleDateFormat; import java.util.Date; import java.util.HashMap; +import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Random; +import java.util.Set; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.atomic.AtomicInteger; @@ -101,6 +103,10 @@ private TokenRewriteStream tokenRewriteStream; private final String executionId; + // Some statements, e.g., UPDATE, DELETE, or MERGE, get rewritten into different + // subqueries that create new contexts. We keep them here so we can clean them + // up when we are done. + private final Set rewrittenStatementContexts; // List of Locks for this query protected List hiveLocks; @@ -254,9 +260,10 @@ public Context(Configuration conf) throws IOException { * Create a Context with a given executionId. ExecutionId, together with * user name and conf, will determine the temporary directory locations. */ - public Context(Configuration conf, String executionId) { + private Context(Configuration conf, String executionId) { this.conf = conf; this.executionId = executionId; + this.rewrittenStatementContexts = new HashSet<>(); // local & non-local tmp location is configurable. however it is the same across // all external file systems @@ -662,6 +669,11 @@ public void setResDir(Path resDir) { } public void clear() throws IOException { + // First clear the other contexts created by this query + for (Context subContext : rewrittenStatementContexts) { + subContext.clear(); + } + // Then clear this context if (resDir != null) { try { FileSystem fs = resDir.getFileSystem(conf); @@ -841,6 +853,10 @@ public void restoreOriginalTracker() { } } + public void addRewrittenStatementContext(Context context) { + rewrittenStatementContexts.add(context); + } + public void addCS(String path, ContentSummary cs) { pathToCS.put(path, cs); } diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/UpdateDeleteSemanticAnalyzer.java ql/src/java/org/apache/hadoop/hive/ql/parse/UpdateDeleteSemanticAnalyzer.java index 78c511b..1bca967 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/UpdateDeleteSemanticAnalyzer.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/UpdateDeleteSemanticAnalyzer.java @@ -32,18 +32,15 @@ import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.metastore.TableType; -import org.apache.hadoop.hive.metastore.TransactionalValidationListener; import org.apache.hadoop.hive.metastore.Warehouse; import org.apache.hadoop.hive.metastore.api.FieldSchema; import org.apache.hadoop.hive.metastore.api.MetaException; -import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants; import org.apache.hadoop.hive.ql.Context; import org.apache.hadoop.hive.ql.ErrorMsg; import org.apache.hadoop.hive.ql.QueryState; import org.apache.hadoop.hive.ql.hooks.Entity; import org.apache.hadoop.hive.ql.hooks.ReadEntity; import org.apache.hadoop.hive.ql.hooks.WriteEntity; -import org.apache.hadoop.hive.ql.io.AcidUtils; import org.apache.hadoop.hive.ql.lib.Node; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.metadata.HiveUtils; @@ -284,22 +281,23 @@ private void cleanUpMetaColumnAccessControl() { } } /** - * Parse the newly generated SQL statment to get a new AST + * Parse the newly generated SQL statement to get a new AST */ private ReparseResult parseRewrittenQuery(StringBuilder rewrittenQueryStr, String originalQuery) throws SemanticException { + // Set dynamic partitioning to nonstrict so that queries do not need any partition + // references. + // todo: this may be a perf issue as it prevents the optimizer.. or not + HiveConf.setVar(conf, HiveConf.ConfVars.DYNAMICPARTITIONINGMODE, "nonstrict"); // Parse the rewritten query string Context rewrittenCtx; try { - // Set dynamic partitioning to nonstrict so that queries do not need any partition - // references. - // todo: this may be a perf issue as it prevents the optimizer.. or not - HiveConf.setVar(conf, HiveConf.ConfVars.DYNAMICPARTITIONINGMODE, "nonstrict"); rewrittenCtx = new Context(conf); - rewrittenCtx.setExplainConfig(ctx.getExplainConfig()); - rewrittenCtx.setIsUpdateDeleteMerge(true); + ctx.addRewrittenStatementContext(rewrittenCtx); } catch (IOException e) { throw new SemanticException(ErrorMsg.UPDATEDELETE_IO_ERROR.getMsg()); } + rewrittenCtx.setExplainConfig(ctx.getExplainConfig()); + rewrittenCtx.setIsUpdateDeleteMerge(true); rewrittenCtx.setCmd(rewrittenQueryStr.toString()); ASTNode rewrittenTree;