diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java index 25e9cd0482..da277d058f 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java @@ -191,7 +191,7 @@ public void initialize(HiveConf hiveConf) { transformations.add(new FixedBucketPruningOptimizer(compatMode)); } - if(HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEOPTREDUCEDEDUPLICATION) || pctx.hasAcidWrite()) { + if(HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEOPTREDUCEDEDUPLICATION)) { transformations.add(new ReduceSinkDeDuplication()); } transformations.add(new NonBlockingOpDeDupProc()); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/AbstractCorrelationProcCtx.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/AbstractCorrelationProcCtx.java index 4e72c4c252..4208abe0fa 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/AbstractCorrelationProcCtx.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/AbstractCorrelationProcCtx.java @@ -28,12 +28,8 @@ import org.apache.hadoop.hive.ql.exec.Operator; import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx; import org.apache.hadoop.hive.ql.parse.ParseContext; -import org.apache.hadoop.hive.ql.plan.FileSinkDesc; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; abstract class AbstractCorrelationProcCtx implements NodeProcessorCtx { - private static final Logger LOG = LoggerFactory.getLogger(AbstractCorrelationProcCtx.class); private ParseContext pctx; // For queries using script, the optimization cannot be applied without user's confirmation // If script preserves alias and value for columns related to keys, user can set this true @@ -49,22 +45,7 @@ public AbstractCorrelationProcCtx(ParseContext pctx) { removedOps = new HashSet>(); trustScript = pctx.getConf().getBoolVar(HIVESCRIPTOPERATORTRUST); - if(pctx.hasAcidWrite()) { - StringBuilder tblNames = new StringBuilder(); - for(FileSinkDesc fsd : pctx.getAcidSinks()) { - if(fsd.getTable() != null) { - tblNames.append(fsd.getTable().getDbName()).append('.').append(fsd.getTable().getTableName()).append(','); - } - } - if(tblNames.length() > 0) { - tblNames.setLength(tblNames.length() - 1);//traling ',' - } - LOG.info("Overriding " + HIVEOPTREDUCEDEDUPLICATIONMINREDUCER + " to 1 due to a write to transactional table(s) " + tblNames); - minReducer = 1; - } - else { - minReducer = pctx.getConf().getIntVar(HIVEOPTREDUCEDEDUPLICATIONMINREDUCER); - } + minReducer = pctx.getConf().getIntVar(HIVEOPTREDUCEDEDUPLICATIONMINREDUCER); isMapAggr = pctx.getConf().getBoolVar(HIVEMAPSIDEAGGREGATE); this.pctx = pctx; } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseContext.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseContext.java index 5f5d7e2ea4..2952572a1d 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseContext.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseContext.java @@ -128,7 +128,6 @@ private Map viewProjectToViewSchema; private ColumnAccessInfo columnAccessInfo; private boolean needViewColumnAuthorization; - private Set acidFileSinks = Collections.emptySet(); private Map rsToRuntimeValuesInfo = new LinkedHashMap(); @@ -199,7 +198,7 @@ public ParseContext( AnalyzeRewriteContext analyzeRewrite, CreateTableDesc createTableDesc, CreateViewDesc createViewDesc, MaterializedViewUpdateDesc materializedViewUpdateDesc, QueryProperties queryProperties, - Map viewProjectToTableSchema, Set acidFileSinks) { + Map viewProjectToTableSchema) { this.queryState = queryState; this.conf = queryState.getConf(); this.opToPartPruner = opToPartPruner; @@ -239,17 +238,8 @@ public ParseContext( // authorization info. this.columnAccessInfo = new ColumnAccessInfo(); } - if(acidFileSinks != null && !acidFileSinks.isEmpty()) { - this.acidFileSinks = new HashSet<>(); - this.acidFileSinks.addAll(acidFileSinks); - } - } - public Set getAcidSinks() { - return acidFileSinks; - } - public boolean hasAcidWrite() { - return !acidFileSinks.isEmpty(); } + /** * @return the context */ diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java index ff60ba54b3..5b889f7e4a 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java @@ -553,7 +553,7 @@ public ParseContext getParseContext() { opToSamplePruner, globalLimitCtx, nameToSplitSample, inputs, rootTasks, opToPartToSkewedPruner, viewAliasToInput, reduceSinkOperatorsAddedByEnforceBucketingSorting, analyzeRewrite, tableDesc, createVwDesc, materializedViewUpdateDesc, - queryProperties, viewProjectToTableSchema, acidFileSinks); + queryProperties, viewProjectToTableSchema); } public CompilationOpContext getOpContext() { @@ -12535,7 +12535,7 @@ void analyzeInternal(ASTNode ast, Supplier pcf) throws SemanticE globalLimitCtx, nameToSplitSample, inputs, rootTasks, opToPartToSkewedPruner, viewAliasToInput, reduceSinkOperatorsAddedByEnforceBucketingSorting, analyzeRewrite, tableDesc, createVwDesc, materializedViewUpdateDesc, - queryProperties, viewProjectToTableSchema, acidFileSinks); + queryProperties, viewProjectToTableSchema); // Set the semijoin hints in parse context pCtx.setSemiJoinHints(parseSemiJoinHint(getQB().getParseInfo().getHintList())); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/TaskCompiler.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/TaskCompiler.java index 36fb93f891..4e8533bd98 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/TaskCompiler.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/TaskCompiler.java @@ -702,15 +702,12 @@ protected void runDynPartitionSortOptimizations(ParseContext parseContext, HiveC !HiveConf.getBoolVar(hConf, HiveConf.ConfVars.HIVEOPTLISTBUCKETING)) { new SortedDynPartitionOptimizer().transform(parseContext); - if(HiveConf.getBoolVar(hConf, HiveConf.ConfVars.HIVEOPTREDUCEDEDUPLICATION) - || parseContext.hasAcidWrite()) { - + if(HiveConf.getBoolVar(hConf, HiveConf.ConfVars.HIVEOPTREDUCEDEDUPLICATION)) { // Dynamic sort partition adds an extra RS therefore need to de-dup new ReduceSinkDeDuplication().transform(parseContext); // there is an issue with dedup logic wherein SELECT is created with wrong columns // NonBlockingOpDeDupProc fixes that new NonBlockingOpDeDupProc().transform(parseContext); - } } } @@ -732,8 +729,7 @@ public ParseContext getParseContext(ParseContext pCtx, List> rootTasks) pCtx.getReduceSinkOperatorsAddedByEnforceBucketingSorting(), pCtx.getAnalyzeRewrite(), pCtx.getCreateTable(), pCtx.getCreateViewDesc(), pCtx.getMaterializedViewUpdateDesc(), - pCtx.getQueryProperties(), pCtx.getViewProjectToTableSchema(), - pCtx.getAcidSinks()); + pCtx.getQueryProperties(), pCtx.getViewProjectToTableSchema()); clone.setFetchTask(pCtx.getFetchTask()); clone.setLineageInfo(pCtx.getLineageInfo()); clone.setMapJoinOps(pCtx.getMapJoinOps()); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java index bf58bd8bb8..6c05ad8473 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java @@ -195,8 +195,7 @@ protected void optimizeOperatorPlan(ParseContext pCtx, Set inputs, perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.TEZ_COMPILER, "Sorted dynamic partition optimization"); } - if(HiveConf.getBoolVar(procCtx.conf, HiveConf.ConfVars.HIVEOPTREDUCEDEDUPLICATION) - || procCtx.parseContext.hasAcidWrite()) { + if(HiveConf.getBoolVar(procCtx.conf, HiveConf.ConfVars.HIVEOPTREDUCEDEDUPLICATION)) { perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.TEZ_COMPILER); // Dynamic sort partition adds an extra RS therefore need to de-dup new ReduceSinkDeDuplication().transform(procCtx.parseContext); diff --git a/ql/src/test/queries/clientpositive/clusterctas.q b/ql/src/test/queries/clientpositive/clusterctas.q new file mode 100644 index 0000000000..d4e45e0194 --- /dev/null +++ b/ql/src/test/queries/clientpositive/clusterctas.q @@ -0,0 +1,12 @@ +--! qt:dataset:src + +set hive.cbo.enable=false; +set hive.support.concurrency=true; +set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager; + +EXPLAIN +CREATE TABLE x STORED AS ORC TBLPROPERTIES('transactional'='true') AS +SELECT * FROM SRC x CLUSTER BY x.key; +CREATE TABLE x STORED AS ORC TBLPROPERTIES('transactional'='true') AS +SELECT * FROM SRC x CLUSTER BY x.key; +DROP TABLE x; diff --git a/ql/src/test/results/clientpositive/llap/clusterctas.q.out b/ql/src/test/results/clientpositive/llap/clusterctas.q.out new file mode 100644 index 0000000000..40ceee215f --- /dev/null +++ b/ql/src/test/results/clientpositive/llap/clusterctas.q.out @@ -0,0 +1,145 @@ +PREHOOK: query: EXPLAIN +CREATE TABLE x STORED AS ORC TBLPROPERTIES('transactional'='true') AS +SELECT * FROM SRC x CLUSTER BY x.key +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@src +PREHOOK: Output: database:default +PREHOOK: Output: default@x +POSTHOOK: query: EXPLAIN +CREATE TABLE x STORED AS ORC TBLPROPERTIES('transactional'='true') AS +SELECT * FROM SRC x CLUSTER BY x.key +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@src +POSTHOOK: Output: database:default +POSTHOOK: Output: default@x +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-4 depends on stages: Stage-0, Stage-2 + Stage-3 depends on stages: Stage-4 + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: x + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: a + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) + Execution mode: vectorized, llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.x + Write Type: INSERT + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: col1, col2 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(col1, 'hll'), compute_stats(col2, 'hll') + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-2 + Dependency Collection + + Stage: Stage-4 + Create Table + columns: key string, value string + name: default.x + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde name: org.apache.hadoop.hive.ql.io.orc.OrcSerde + table properties: + transactional true + + Stage: Stage-3 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.x + + Stage: Stage-0 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### + Write Type: INSERT + +PREHOOK: query: CREATE TABLE x STORED AS ORC TBLPROPERTIES('transactional'='true') AS +SELECT * FROM SRC x CLUSTER BY x.key +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@src +PREHOOK: Output: database:default +PREHOOK: Output: default@x +POSTHOOK: query: CREATE TABLE x STORED AS ORC TBLPROPERTIES('transactional'='true') AS +SELECT * FROM SRC x CLUSTER BY x.key +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@src +POSTHOOK: Output: database:default +POSTHOOK: Output: default@x +POSTHOOK: Lineage: x.key SIMPLE [(src)x.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: x.value SIMPLE [(src)x.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: DROP TABLE x +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@x +PREHOOK: Output: default@x +POSTHOOK: query: DROP TABLE x +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@x +POSTHOOK: Output: default@x