diff --git itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/TestAcidOnTez.java itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/TestAcidOnTez.java index 286842798d..056cd27496 100644 --- itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/TestAcidOnTez.java +++ itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/TestAcidOnTez.java @@ -699,7 +699,8 @@ public void testBucketedAcidInsertWithRemoveUnion() throws Exception { setupTez(confForTez); int[][] values = {{1,2},{2,4},{5,6},{6,8},{9,10}}; runStatementOnDriver("delete from " + Table.ACIDTBL, confForTez); - runStatementOnDriver("insert into " + Table.ACIDTBL + TestTxnCommands2.makeValuesClause(values));//make sure both buckets are not empty + //make sure both buckets are not empty + runStatementOnDriver("insert into " + Table.ACIDTBL + TestTxnCommands2.makeValuesClause(values), confForTez); runStatementOnDriver("drop table if exists T", confForTez); /* With bucketed target table Union All is not removed diff --git ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcRecordUpdater.java ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcRecordUpdater.java index 3fa61d3560..398698ec06 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcRecordUpdater.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcRecordUpdater.java @@ -576,11 +576,7 @@ public void close(boolean abort) throws IOException { if (options.isWritingBase()) { // With insert overwrite we need the empty file to delete the previous content of the table LOG.debug("Empty file has been created for overwrite: {}", path); - - OrcFile.WriterOptions wo = OrcFile.writerOptions(this.options.getConfiguration()) - .inspector(rowInspector) - .callback(new OrcRecordUpdater.KeyIndexBuilder("testEmpty")); - OrcFile.createWriter(path, wo).close(); + OrcFile.createWriter(path, writerOptions).close(); } else { LOG.debug("No insert events in path: {}.. Deleting..", path); fs.delete(path, false); diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java index 25e9cd0482..da277d058f 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java @@ -191,7 +191,7 @@ public void initialize(HiveConf hiveConf) { transformations.add(new FixedBucketPruningOptimizer(compatMode)); } - if(HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEOPTREDUCEDEDUPLICATION) || pctx.hasAcidWrite()) { + if(HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEOPTREDUCEDEDUPLICATION)) { transformations.add(new ReduceSinkDeDuplication()); } transformations.add(new NonBlockingOpDeDupProc()); diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/AbstractCorrelationProcCtx.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/AbstractCorrelationProcCtx.java index 4e72c4c252..4208abe0fa 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/AbstractCorrelationProcCtx.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/AbstractCorrelationProcCtx.java @@ -28,12 +28,8 @@ import org.apache.hadoop.hive.ql.exec.Operator; import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx; import org.apache.hadoop.hive.ql.parse.ParseContext; -import org.apache.hadoop.hive.ql.plan.FileSinkDesc; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; abstract class AbstractCorrelationProcCtx implements NodeProcessorCtx { - private static final Logger LOG = LoggerFactory.getLogger(AbstractCorrelationProcCtx.class); private ParseContext pctx; // For queries using script, the optimization cannot be applied without user's confirmation // If script preserves alias and value for columns related to keys, user can set this true @@ -49,22 +45,7 @@ public AbstractCorrelationProcCtx(ParseContext pctx) { removedOps = new HashSet>(); trustScript = pctx.getConf().getBoolVar(HIVESCRIPTOPERATORTRUST); - if(pctx.hasAcidWrite()) { - StringBuilder tblNames = new StringBuilder(); - for(FileSinkDesc fsd : pctx.getAcidSinks()) { - if(fsd.getTable() != null) { - tblNames.append(fsd.getTable().getDbName()).append('.').append(fsd.getTable().getTableName()).append(','); - } - } - if(tblNames.length() > 0) { - tblNames.setLength(tblNames.length() - 1);//traling ',' - } - LOG.info("Overriding " + HIVEOPTREDUCEDEDUPLICATIONMINREDUCER + " to 1 due to a write to transactional table(s) " + tblNames); - minReducer = 1; - } - else { - minReducer = pctx.getConf().getIntVar(HIVEOPTREDUCEDEDUPLICATIONMINREDUCER); - } + minReducer = pctx.getConf().getIntVar(HIVEOPTREDUCEDEDUPLICATIONMINREDUCER); isMapAggr = pctx.getConf().getBoolVar(HIVEMAPSIDEAGGREGATE); this.pctx = pctx; } diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/ParseContext.java ql/src/java/org/apache/hadoop/hive/ql/parse/ParseContext.java index 91bdbfd67d..bef02176c2 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/ParseContext.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/ParseContext.java @@ -46,17 +46,14 @@ import org.apache.hadoop.hive.ql.optimizer.unionproc.UnionProcContext; import org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer.AnalyzeRewriteContext; import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; -import org.apache.hadoop.hive.ql.plan.FileSinkDesc; import org.apache.hadoop.hive.ql.plan.FilterDesc.SampleDesc; import org.apache.hadoop.hive.ql.plan.LoadFileDesc; import org.apache.hadoop.hive.ql.plan.LoadTableDesc; import org.apache.hadoop.hive.ql.plan.MapJoinDesc; import org.apache.hadoop.hive.ql.plan.TableDesc; -import java.io.Serializable; import java.util.ArrayList; import java.util.Collection; -import java.util.Collections; import java.util.HashMap; import java.util.HashSet; import java.util.LinkedHashMap; @@ -128,7 +125,6 @@ private Map viewProjectToViewSchema; private ColumnAccessInfo columnAccessInfo; private boolean needViewColumnAuthorization; - private Set acidFileSinks = Collections.emptySet(); private Map rsToRuntimeValuesInfo = new LinkedHashMap(); @@ -199,7 +195,7 @@ public ParseContext( AnalyzeRewriteContext analyzeRewrite, CreateTableDesc createTableDesc, CreateViewDesc createViewDesc, MaterializedViewUpdateDesc materializedViewUpdateDesc, QueryProperties queryProperties, - Map viewProjectToTableSchema, Set acidFileSinks) { + Map viewProjectToTableSchema) { this.queryState = queryState; this.conf = queryState.getConf(); this.opToPartPruner = opToPartPruner; @@ -239,17 +235,8 @@ public ParseContext( // authorization info. this.columnAccessInfo = new ColumnAccessInfo(); } - if(acidFileSinks != null && !acidFileSinks.isEmpty()) { - this.acidFileSinks = new HashSet<>(); - this.acidFileSinks.addAll(acidFileSinks); - } - } - public Set getAcidSinks() { - return acidFileSinks; - } - public boolean hasAcidWrite() { - return !acidFileSinks.isEmpty(); } + /** * @return the context */ diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java index 02d25efa1e..2ebc4ee0b5 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java @@ -557,7 +557,7 @@ public ParseContext getParseContext() { opToSamplePruner, globalLimitCtx, nameToSplitSample, inputs, rootTasks, opToPartToSkewedPruner, viewAliasToInput, reduceSinkOperatorsAddedByEnforceBucketingSorting, analyzeRewrite, tableDesc, createVwDesc, materializedViewUpdateDesc, - queryProperties, viewProjectToTableSchema, acidFileSinks); + queryProperties, viewProjectToTableSchema); } public CompilationOpContext getOpContext() { @@ -6859,10 +6859,15 @@ private Operator genBucketingSortingDest(String dest, Operator input, QB qb, } if (enforceBucketing) { + Operation acidOp = AcidUtils.isFullAcidTable(dest_tab) ? getAcidType(table_desc.getOutputFileFormatClass(), + dest, AcidUtils.isInsertOnlyTable(dest_tab)) : Operation.NOT_ACID; int maxReducers = conf.getIntVar(HiveConf.ConfVars.MAXREDUCERS); if (conf.getIntVar(HiveConf.ConfVars.HADOOPNUMREDUCERS) > 0) { maxReducers = conf.getIntVar(HiveConf.ConfVars.HADOOPNUMREDUCERS); } + if (acidOp == Operation.UPDATE || acidOp == Operation.DELETE) { + maxReducers = 1; + } int numBuckets = dest_tab.getNumBuckets(); if (numBuckets > maxReducers) { LOG.debug("numBuckets is {} and maxReducers is {}", numBuckets, maxReducers); @@ -6877,7 +6882,7 @@ private Operator genBucketingSortingDest(String dest, Operator input, QB qb, numFiles = totalFiles / maxReducers; } } - else { + else if (acidOp == Operation.NOT_ACID || acidOp == Operation.INSERT) { maxReducers = numBuckets; } @@ -6889,8 +6894,7 @@ private Operator genBucketingSortingDest(String dest, Operator input, QB qb, } input = genReduceSinkPlan(input, partnCols, sortCols, order.toString(), nullOrder.toString(), maxReducers, - (AcidUtils.isFullAcidTable(dest_tab) ? getAcidType(table_desc.getOutputFileFormatClass(), - dest, AcidUtils.isInsertOnlyTable(dest_tab)) : AcidUtils.Operation.NOT_ACID)); + acidOp); reduceSinkOperatorsAddedByEnforceBucketingSorting.add((ReduceSinkOperator)input.getParentOperators().get(0)); ctx.setMultiFileSpray(multiFileSpray); ctx.setNumFiles(numFiles); @@ -12545,7 +12549,7 @@ void analyzeInternal(final ASTNode astToAnalyze, Supplier pcf) t globalLimitCtx, nameToSplitSample, inputs, rootTasks, opToPartToSkewedPruner, viewAliasToInput, reduceSinkOperatorsAddedByEnforceBucketingSorting, analyzeRewrite, tableDesc, createVwDesc, materializedViewUpdateDesc, - queryProperties, viewProjectToTableSchema, acidFileSinks); + queryProperties, viewProjectToTableSchema); // Set the semijoin hints in parse context pCtx.setSemiJoinHints(parseSemiJoinHint(getQB().getParseInfo().getHintList())); diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/TaskCompiler.java ql/src/java/org/apache/hadoop/hive/ql/parse/TaskCompiler.java index 1e1d65bcb5..2f3fc6c50a 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/TaskCompiler.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/TaskCompiler.java @@ -702,15 +702,12 @@ protected void runDynPartitionSortOptimizations(ParseContext parseContext, HiveC !HiveConf.getBoolVar(hConf, HiveConf.ConfVars.HIVEOPTLISTBUCKETING)) { new SortedDynPartitionOptimizer().transform(parseContext); - if(HiveConf.getBoolVar(hConf, HiveConf.ConfVars.HIVEOPTREDUCEDEDUPLICATION) - || parseContext.hasAcidWrite()) { - + if(HiveConf.getBoolVar(hConf, HiveConf.ConfVars.HIVEOPTREDUCEDEDUPLICATION)) { // Dynamic sort partition adds an extra RS therefore need to de-dup new ReduceSinkDeDuplication().transform(parseContext); // there is an issue with dedup logic wherein SELECT is created with wrong columns // NonBlockingOpDeDupProc fixes that new NonBlockingOpDeDupProc().transform(parseContext); - } } } @@ -732,8 +729,7 @@ public ParseContext getParseContext(ParseContext pCtx, List> rootTasks) pCtx.getReduceSinkOperatorsAddedByEnforceBucketingSorting(), pCtx.getAnalyzeRewrite(), pCtx.getCreateTable(), pCtx.getCreateViewDesc(), pCtx.getMaterializedViewUpdateDesc(), - pCtx.getQueryProperties(), pCtx.getViewProjectToTableSchema(), - pCtx.getAcidSinks()); + pCtx.getQueryProperties(), pCtx.getViewProjectToTableSchema()); clone.setFetchTask(pCtx.getFetchTask()); clone.setLineageInfo(pCtx.getLineageInfo()); clone.setMapJoinOps(pCtx.getMapJoinOps()); diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java index ff815434f0..5a78ed5f9f 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java @@ -197,8 +197,7 @@ protected void optimizeOperatorPlan(ParseContext pCtx, Set inputs, perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.TEZ_COMPILER, "Sorted dynamic partition optimization"); } - if(HiveConf.getBoolVar(procCtx.conf, HiveConf.ConfVars.HIVEOPTREDUCEDEDUPLICATION) - || procCtx.parseContext.hasAcidWrite()) { + if(HiveConf.getBoolVar(procCtx.conf, HiveConf.ConfVars.HIVEOPTREDUCEDEDUPLICATION)) { perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.TEZ_COMPILER); // Dynamic sort partition adds an extra RS therefore need to de-dup new ReduceSinkDeDuplication().transform(procCtx.parseContext); diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/PlanUtils.java ql/src/java/org/apache/hadoop/hive/ql/plan/PlanUtils.java index 09b7ce2ca2..502696615a 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/PlanUtils.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/PlanUtils.java @@ -25,6 +25,7 @@ import java.util.Collection; import java.util.Collections; import java.util.Comparator; +import java.util.EnumSet; import java.util.HashMap; import java.util.LinkedHashMap; import java.util.List; @@ -710,13 +711,18 @@ public static ReduceSinkDesc getReduceSinkDesc( List outputColumnNames, boolean includeKeyCols, int tag, List partitionCols, String order, String nullOrder, NullOrdering defaultNullOrder, int numReducers, AcidUtils.Operation writeType) { - return getReduceSinkDesc(keyCols, keyCols.size(), valueCols, - new ArrayList>(), - includeKeyCols ? outputColumnNames.subList(0, keyCols.size()) : - new ArrayList(), - includeKeyCols ? outputColumnNames.subList(keyCols.size(), - outputColumnNames.size()) : outputColumnNames, - includeKeyCols, tag, partitionCols, order, nullOrder, defaultNullOrder, numReducers, writeType); + ReduceSinkDesc reduceSinkDesc = getReduceSinkDesc(keyCols, keyCols.size(), valueCols, + new ArrayList>(), + includeKeyCols ? outputColumnNames.subList(0, keyCols.size()) : + new ArrayList(), + includeKeyCols ? outputColumnNames.subList(keyCols.size(), + outputColumnNames.size()) : outputColumnNames, + includeKeyCols, tag, partitionCols, order, nullOrder, defaultNullOrder, numReducers, writeType); + if (writeType == AcidUtils.Operation.UPDATE || writeType == AcidUtils.Operation.DELETE) { + reduceSinkDesc.setReducerTraits(EnumSet.of(ReduceSinkDesc.ReducerTraits.FIXED)); + reduceSinkDesc.setNumReducers(1); + } + return reduceSinkDesc; } /** diff --git ql/src/test/queries/clientpositive/clusterctas.q ql/src/test/queries/clientpositive/clusterctas.q new file mode 100644 index 0000000000..d4e45e0194 --- /dev/null +++ ql/src/test/queries/clientpositive/clusterctas.q @@ -0,0 +1,12 @@ +--! qt:dataset:src + +set hive.cbo.enable=false; +set hive.support.concurrency=true; +set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager; + +EXPLAIN +CREATE TABLE x STORED AS ORC TBLPROPERTIES('transactional'='true') AS +SELECT * FROM SRC x CLUSTER BY x.key; +CREATE TABLE x STORED AS ORC TBLPROPERTIES('transactional'='true') AS +SELECT * FROM SRC x CLUSTER BY x.key; +DROP TABLE x; diff --git ql/src/test/results/clientpositive/clusterctas.q.out ql/src/test/results/clientpositive/clusterctas.q.out new file mode 100644 index 0000000000..9d76bc5903 --- /dev/null +++ ql/src/test/results/clientpositive/clusterctas.q.out @@ -0,0 +1,142 @@ +PREHOOK: query: EXPLAIN +CREATE TABLE x STORED AS ORC TBLPROPERTIES('transactional'='true') AS +SELECT * FROM SRC x CLUSTER BY x.key +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@src +PREHOOK: Output: database:default +PREHOOK: Output: default@x +POSTHOOK: query: EXPLAIN +CREATE TABLE x STORED AS ORC TBLPROPERTIES('transactional'='true') AS +SELECT * FROM SRC x CLUSTER BY x.key +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@src +POSTHOOK: Output: database:default +POSTHOOK: Output: default@x +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + Stage-4 depends on stages: Stage-0, Stage-3 + Stage-2 depends on stages: Stage-4 + Stage-3 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: x + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: a + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) + Execution mode: vectorized + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.x + Write Type: INSERT + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: col1, col2 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(col1, 'hll'), compute_stats(col2, 'hll') + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-0 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### + Write Type: INSERT + + Stage: Stage-4 + Create Table + columns: key string, value string + name: default.x + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde name: org.apache.hadoop.hive.ql.io.orc.OrcSerde + table properties: + transactional true + + Stage: Stage-2 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.x + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + +PREHOOK: query: CREATE TABLE x STORED AS ORC TBLPROPERTIES('transactional'='true') AS +SELECT * FROM SRC x CLUSTER BY x.key +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@src +PREHOOK: Output: database:default +PREHOOK: Output: default@x +POSTHOOK: query: CREATE TABLE x STORED AS ORC TBLPROPERTIES('transactional'='true') AS +SELECT * FROM SRC x CLUSTER BY x.key +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@src +POSTHOOK: Output: database:default +POSTHOOK: Output: default@x +POSTHOOK: Lineage: x.key SIMPLE [(src)x.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: x.value SIMPLE [(src)x.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: DROP TABLE x +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@x +PREHOOK: Output: default@x +POSTHOOK: query: DROP TABLE x +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@x +POSTHOOK: Output: default@x diff --git ql/src/test/results/clientpositive/llap/check_constraint.q.out ql/src/test/results/clientpositive/llap/check_constraint.q.out index 9f82a10431..955a07112d 100644 --- ql/src/test/results/clientpositive/llap/check_constraint.q.out +++ ql/src/test/results/clientpositive/llap/check_constraint.q.out @@ -1751,6 +1751,7 @@ STAGE PLANS: Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -1795,23 +1796,37 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 250 Data size: 73500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col2 (type: int), _col3 (type: decimal(5,2)), _col1 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 250 Data size: 51750 Basic stats: COMPLETE Column stats: COMPLETE - Limit - Number of rows: 10 - Statistics: Num rows: 10 Data size: 2070 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: enforce_constraint((_col1 is not null and (_col1 >= CAST( _col0 AS decimal(5,2))) is not false)) (type: boolean) - Statistics: Num rows: 5 Data size: 1035 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: a - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 5 Data size: 1035 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: decimal(5,2)), _col2 (type: string) + expressions: _col2 (type: int), _col3 (type: decimal(5,2)), _col1 (type: string), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 73500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col3 (type: string), _col2 (type: string) + null sort order: zz + sort order: ++ + Statistics: Num rows: 250 Data size: 73500 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.1 + value expressions: _col0 (type: int), _col1 (type: decimal(5,2)) Reducer 3 + Execution mode: vectorized, llap + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: int), VALUE._col1 (type: decimal(5,2)), KEY.reducesinkkey1 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 73500 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 2940 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: enforce_constraint((_col1 is not null and (_col1 >= CAST( _col0 AS decimal(5,2))) is not false)) (type: boolean) + Statistics: Num rows: 5 Data size: 1470 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: a + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 5 Data size: 1470 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: decimal(5,2)), _col2 (type: string) + Reducer 4 Execution mode: vectorized, llap Reduce Operator Tree: Select Operator diff --git ql/src/test/results/clientpositive/llap/clusterctas.q.out ql/src/test/results/clientpositive/llap/clusterctas.q.out new file mode 100644 index 0000000000..40ceee215f --- /dev/null +++ ql/src/test/results/clientpositive/llap/clusterctas.q.out @@ -0,0 +1,145 @@ +PREHOOK: query: EXPLAIN +CREATE TABLE x STORED AS ORC TBLPROPERTIES('transactional'='true') AS +SELECT * FROM SRC x CLUSTER BY x.key +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@src +PREHOOK: Output: database:default +PREHOOK: Output: default@x +POSTHOOK: query: EXPLAIN +CREATE TABLE x STORED AS ORC TBLPROPERTIES('transactional'='true') AS +SELECT * FROM SRC x CLUSTER BY x.key +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@src +POSTHOOK: Output: database:default +POSTHOOK: Output: default@x +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-4 depends on stages: Stage-0, Stage-2 + Stage-3 depends on stages: Stage-4 + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: x + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: a + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) + Execution mode: vectorized, llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.x + Write Type: INSERT + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: col1, col2 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(col1, 'hll'), compute_stats(col2, 'hll') + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-2 + Dependency Collection + + Stage: Stage-4 + Create Table + columns: key string, value string + name: default.x + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde name: org.apache.hadoop.hive.ql.io.orc.OrcSerde + table properties: + transactional true + + Stage: Stage-3 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.x + + Stage: Stage-0 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### + Write Type: INSERT + +PREHOOK: query: CREATE TABLE x STORED AS ORC TBLPROPERTIES('transactional'='true') AS +SELECT * FROM SRC x CLUSTER BY x.key +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@src +PREHOOK: Output: database:default +PREHOOK: Output: default@x +POSTHOOK: query: CREATE TABLE x STORED AS ORC TBLPROPERTIES('transactional'='true') AS +SELECT * FROM SRC x CLUSTER BY x.key +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@src +POSTHOOK: Output: database:default +POSTHOOK: Output: default@x +POSTHOOK: Lineage: x.key SIMPLE [(src)x.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: x.value SIMPLE [(src)x.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: DROP TABLE x +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@x +PREHOOK: Output: default@x +POSTHOOK: query: DROP TABLE x +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@x +POSTHOOK: Output: default@x diff --git ql/src/test/results/clientpositive/llap/enforce_constraint_notnull.q.out ql/src/test/results/clientpositive/llap/enforce_constraint_notnull.q.out index 8ccec3a516..3f99d0c23c 100644 --- ql/src/test/results/clientpositive/llap/enforce_constraint_notnull.q.out +++ ql/src/test/results/clientpositive/llap/enforce_constraint_notnull.q.out @@ -3075,7 +3075,8 @@ STAGE PLANS: Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (SIMPLE_EDGE) - Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) + Reducer 5 <- Reducer 4 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -3117,23 +3118,37 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Statistics: Num rows: 250 Data size: 44500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: UDFToInteger(_col0) (type: int), CAST( _col0 AS decimal(5,2)) (type: decimal(5,2)), _col1 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 250 Data size: 51750 Basic stats: COMPLETE Column stats: COMPLETE - Limit - Number of rows: 2 - Statistics: Num rows: 2 Data size: 414 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: enforce_constraint((_col1 is not null and _col2 is not null)) (type: boolean) - Statistics: Num rows: 1 Data size: 207 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: a - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 207 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: decimal(5,2)), _col2 (type: string) + expressions: UDFToInteger(_col0) (type: int), CAST( _col0 AS decimal(5,2)) (type: decimal(5,2)), _col1 (type: string), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 73500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col3 (type: string) + null sort order: z + sort order: + + Statistics: Num rows: 250 Data size: 73500 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.1 + value expressions: _col0 (type: int), _col1 (type: decimal(5,2)), _col2 (type: string) Reducer 3 + Execution mode: vectorized, llap + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: int), VALUE._col1 (type: decimal(5,2)), VALUE._col2 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 73500 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 2 + Statistics: Num rows: 2 Data size: 588 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: enforce_constraint((_col1 is not null and _col2 is not null)) (type: boolean) + Statistics: Num rows: 1 Data size: 294 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: a + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 294 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: decimal(5,2)), _col2 (type: string) + Reducer 4 Execution mode: llap Reduce Operator Tree: Select Operator @@ -3164,7 +3179,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 1496 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) - Reducer 4 + Reducer 5 Execution mode: llap Reduce Operator Tree: Group By Operator diff --git ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite_4.q.out ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite_4.q.out index 8196f11519..25ce6d6198 100644 --- ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite_4.q.out +++ ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite_4.q.out @@ -87,8 +87,9 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -112,7 +113,7 @@ STAGE PLANS: Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: may be used (ACID table) - Map 4 + Map 5 Map Operator Tree: TableScan alias: cmv_basetable_2_n2 @@ -183,20 +184,30 @@ STAGE PLANS: Statistics: Num rows: 2 Data size: 248 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: compute_stats(col1, 'hll'), compute_stats(col2, 'hll'), compute_stats(col3, 'hll') - mode: complete + minReductionHashAggr: 0.5 + mode: hash outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 1480 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) - outputColumnNames: _col0, _col1, _col2 + Reduce Output Operator + null sort order: + sort order: Statistics: Num rows: 1 Data size: 1480 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1480 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1528 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1528 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -275,10 +286,10 @@ Table Type: MATERIALIZED_VIEW Table Parameters: COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"_c2\":\"true\",\"a\":\"true\",\"c\":\"true\"}} bucketing_version 2 - numFiles 1 + numFiles 2 numRows 2 rawDataSize 0 - totalSize 819 + totalSize 1539 transactional true transactional_properties default #### A masked pattern was here #### @@ -514,10 +525,10 @@ Table Type: MATERIALIZED_VIEW Table Parameters: COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"_c2\":\"true\",\"a\":\"true\",\"c\":\"true\"}} bucketing_version 2 - numFiles 1 + numFiles 2 numRows 2 rawDataSize 0 - totalSize 819 + totalSize 1539 transactional true transactional_properties default #### A masked pattern was here #### @@ -997,10 +1008,10 @@ Table Type: MATERIALIZED_VIEW Table Parameters: COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} bucketing_version 2 - numFiles 2 + numFiles 3 numRows 3 rawDataSize 0 - totalSize 1576 + totalSize 2296 transactional true transactional_properties default #### A masked pattern was here #### @@ -1081,8 +1092,8 @@ POSTHOOK: Input: default@cmv_basetable_2_n2 POSTHOOK: Input: default@cmv_basetable_n5 POSTHOOK: Input: default@cmv_mat_view_n5 #### A masked pattern was here #### -1 2 3 6 +1 2 3 2 PREHOOK: query: UPDATE cmv_basetable_2_n2 SET a=2 WHERE a=1 PREHOOK: type: QUERY @@ -1116,8 +1127,9 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -1141,7 +1153,7 @@ STAGE PLANS: Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: may be used (ACID table) - Map 4 + Map 5 Map Operator Tree: TableScan alias: cmv_basetable_2_n2 @@ -1212,20 +1224,30 @@ STAGE PLANS: Statistics: Num rows: 2 Data size: 248 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: compute_stats(a, 'hll'), compute_stats(c, 'hll'), compute_stats(_c2, 'hll') - mode: complete + minReductionHashAggr: 0.5 + mode: hash outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 1480 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) - outputColumnNames: _col0, _col1, _col2 + Reduce Output Operator + null sort order: + sort order: Statistics: Num rows: 1 Data size: 1480 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1480 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1528 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1528 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -1287,10 +1309,10 @@ Table Type: MATERIALIZED_VIEW Table Parameters: COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"_c2\":\"true\",\"a\":\"true\",\"c\":\"true\"}} bucketing_version 2 - numFiles 1 + numFiles 2 numRows 3 rawDataSize 0 - totalSize 822 + totalSize 1041 transactional true transactional_properties default #### A masked pattern was here #### @@ -1406,8 +1428,9 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -1431,7 +1454,7 @@ STAGE PLANS: Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: may be used (ACID table) - Map 4 + Map 5 Map Operator Tree: TableScan alias: cmv_basetable_2_n2 @@ -1502,20 +1525,30 @@ STAGE PLANS: Statistics: Num rows: 2 Data size: 248 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: compute_stats(a, 'hll'), compute_stats(c, 'hll'), compute_stats(_c2, 'hll') - mode: complete + minReductionHashAggr: 0.5 + mode: hash outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 1480 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) - outputColumnNames: _col0, _col1, _col2 + Reduce Output Operator + null sort order: + sort order: Statistics: Num rows: 1 Data size: 1480 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1480 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1528 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1528 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -1577,10 +1610,10 @@ Table Type: MATERIALIZED_VIEW Table Parameters: COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"_c2\":\"true\",\"a\":\"true\",\"c\":\"true\"}} bucketing_version 2 - numFiles 1 + numFiles 2 numRows 2 rawDataSize 0 - totalSize 820 + totalSize 1039 transactional true transactional_properties default #### A masked pattern was here #### @@ -1987,10 +2020,10 @@ Table Type: MATERIALIZED_VIEW Table Parameters: COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} bucketing_version 2 - numFiles 2 + numFiles 3 numRows 3 rawDataSize 0 - totalSize 1576 + totalSize 1795 transactional true transactional_properties default #### A masked pattern was here #### diff --git ql/src/test/results/clientpositive/llap/materialized_view_rewrite_window.q.out ql/src/test/results/clientpositive/llap/materialized_view_rewrite_window.q.out index fd330b02ff..4e35bba447 100644 --- ql/src/test/results/clientpositive/llap/materialized_view_rewrite_window.q.out +++ ql/src/test/results/clientpositive/llap/materialized_view_rewrite_window.q.out @@ -239,9 +239,10 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (SIMPLE_EDGE) - Reducer 5 <- Map 4 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) + Reducer 6 <- Map 5 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -265,7 +266,7 @@ STAGE PLANS: Statistics: Num rows: 5 Data size: 460 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: may be used (ACID table) - Map 4 + Map 5 Map Operator Tree: TableScan alias: tv_view_data @@ -328,21 +329,31 @@ STAGE PLANS: Statistics: Num rows: 2 Data size: 240 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: compute_stats(quartile, 'hll'), compute_stats(total, 'hll') - mode: complete + minReductionHashAggr: 0.5 + mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 1056 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: struct), _col1 (type: struct) - outputColumnNames: _col0, _col1 + Reduce Output Operator + null sort order: + sort order: Statistics: Num rows: 1 Data size: 1056 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1056 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 5 + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 1088 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1088 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 6 Execution mode: vectorized, llap Reduce Operator Tree: Select Operator @@ -535,9 +546,10 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (SIMPLE_EDGE) - Reducer 5 <- Map 4 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) + Reducer 6 <- Map 5 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -561,7 +573,7 @@ STAGE PLANS: Statistics: Num rows: 5 Data size: 460 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: may be used (ACID table) - Map 4 + Map 5 Map Operator Tree: TableScan alias: tv_view_data @@ -624,21 +636,31 @@ STAGE PLANS: Statistics: Num rows: 2 Data size: 240 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: compute_stats(quartile, 'hll'), compute_stats(total, 'hll') - mode: complete + minReductionHashAggr: 0.5 + mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 1056 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: struct), _col1 (type: struct) - outputColumnNames: _col0, _col1 + Reduce Output Operator + null sort order: + sort order: Statistics: Num rows: 1 Data size: 1056 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1056 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 5 + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 1088 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1088 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 6 Execution mode: vectorized, llap Reduce Operator Tree: Select Operator diff --git ql/src/test/results/clientpositive/llap/semijoin_reddedup.q.out ql/src/test/results/clientpositive/llap/semijoin_reddedup.q.out index ae9560907f..fcae6cae9a 100644 --- ql/src/test/results/clientpositive/llap/semijoin_reddedup.q.out +++ ql/src/test/results/clientpositive/llap/semijoin_reddedup.q.out @@ -258,12 +258,13 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE) - Reducer 4 <- Map 10 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) + Reducer 10 <- Map 9 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) + Reducer 3 <- Reducer 10 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) + Reducer 4 <- Map 11 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) Reducer 5 <- Reducer 4 (SIMPLE_EDGE) - Reducer 6 <- Reducer 5 (CUSTOM_SIMPLE_EDGE) - Reducer 9 <- Map 8 (SIMPLE_EDGE) + Reducer 6 <- Reducer 5 (SIMPLE_EDGE) + Reducer 7 <- Reducer 6 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -288,7 +289,7 @@ STAGE PLANS: value expressions: _col0 (type: bigint), _col2 (type: double), _col3 (type: string) Execution mode: vectorized, llap LLAP IO: may be used (ACID table) - Map 10 + Map 11 Map Operator Tree: TableScan alias: l @@ -310,7 +311,7 @@ STAGE PLANS: value expressions: _col1 (type: double) Execution mode: vectorized, llap LLAP IO: may be used (ACID table) - Map 7 + Map 8 Map Operator Tree: TableScan alias: customer @@ -332,7 +333,7 @@ STAGE PLANS: value expressions: _col1 (type: string) Execution mode: vectorized, llap LLAP IO: may be used (ACID table) - Map 8 + Map 9 Map Operator Tree: TableScan alias: lineitem @@ -359,6 +360,28 @@ STAGE PLANS: value expressions: _col1 (type: double) Execution mode: vectorized, llap LLAP IO: may be used (ACID table) + Reducer 10 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: bigint) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2849995116 Data size: 43319925835 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (_col1 > 300.0D) (type: boolean) + Statistics: Num rows: 949998372 Data size: 14439975278 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 949998372 Data size: 14439975278 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 949998372 Data size: 14439975278 Basic stats: COMPLETE Column stats: NONE Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -428,7 +451,7 @@ STAGE PLANS: TopN Hash Memory Usage: 0.1 value expressions: _col5 (type: double) Reducer 5 - Execution mode: llap + Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) @@ -440,34 +463,48 @@ STAGE PLANS: expressions: _col4 (type: string), _col3 (type: bigint), _col2 (type: bigint), _col1 (type: string), _col0 (type: double), _col5 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 3134994695 Data size: 47651919443 Basic stats: COMPLETE Column stats: NONE - Limit - Number of rows: 100 + Reduce Output Operator + key expressions: _col4 (type: double), _col3 (type: string) + null sort order: zz + sort order: -+ + Statistics: Num rows: 3134994695 Data size: 47651919443 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: bigint), _col5 (type: double) + Reducer 6 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string), VALUE._col1 (type: bigint), VALUE._col2 (type: bigint), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: double), VALUE._col3 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 3134994695 Data size: 47651919443 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 100 + Statistics: Num rows: 100 Data size: 1500 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false Statistics: Num rows: 100 Data size: 1500 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 100 Data size: 1500 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: tpch_test.q18_large_volume_customer_cached - Write Type: INSERT - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: bigint), _col3 (type: string), _col4 (type: double), _col5 (type: double) - outputColumnNames: col1, col2, col3, col4, col5, col6 - Statistics: Num rows: 100 Data size: 1500 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: compute_stats(col1, 'hll'), compute_stats(col2, 'hll'), compute_stats(col3, 'hll'), compute_stats(col4, 'hll'), compute_stats(col5, 'hll'), compute_stats(col6, 'hll') - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: tpch_test.q18_large_volume_customer_cached + Write Type: INSERT + Select Operator + expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: bigint), _col3 (type: string), _col4 (type: double), _col5 (type: double) + outputColumnNames: col1, col2, col3, col4, col5, col6 + Statistics: Num rows: 100 Data size: 1500 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(col1, 'hll'), compute_stats(col2, 'hll'), compute_stats(col3, 'hll'), compute_stats(col4, 'hll'), compute_stats(col5, 'hll'), compute_stats(col6, 'hll') + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 2576 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: Statistics: Num rows: 1 Data size: 2576 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 2576 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct) - Reducer 6 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct) + Reducer 7 Execution mode: llap Reduce Operator Tree: Group By Operator @@ -482,28 +519,6 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 9 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: sum(VALUE._col0) - keys: KEY._col0 (type: bigint) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 2849995116 Data size: 43319925835 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (_col1 > 300.0D) (type: boolean) - Statistics: Num rows: 949998372 Data size: 14439975278 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: bigint) - outputColumnNames: _col0 - Statistics: Num rows: 949998372 Data size: 14439975278 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: bigint) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: bigint) - Statistics: Num rows: 949998372 Data size: 14439975278 Basic stats: COMPLETE Column stats: NONE Stage: Stage-2 Dependency Collection