diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties index b01ebd8..d4ea20a 100644 --- a/itests/src/test/resources/testconfiguration.properties +++ b/itests/src/test/resources/testconfiguration.properties @@ -474,6 +474,7 @@ minillaplocal.query.files=acid_globallimit.q,\ cbo_rp_unionDistinct_2.q,\ cbo_rp_windowing_2.q,\ cbo_subq_not_in.q,\ + column_table_stats.q,\ constprog_dpp.q,\ current_date_timestamp.q,\ correlationoptimizer1.q,\ diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java index 0e67ea6..216a5d6 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java @@ -747,6 +747,10 @@ public static void setMapWork(MapWork plan, ParseContext parseCtx, Set p } } + public TableSpec(Table tableHandle, List partitions) + throws HiveException { + this.tableHandle = tableHandle; + this.tableName = tableHandle.getTableName(); + if (partitions != null && !partitions.isEmpty()) { + this.specType = SpecType.STATIC_PARTITION; + this.partitions = partitions; + List partCols = this.tableHandle.getPartCols(); + this.partSpec = new LinkedHashMap<>(); + for (FieldSchema partCol : partCols) { + partSpec.put(partCol.getName(), null); + } + } else { + this.specType = SpecType.TABLE_ONLY; + } + } + public TableSpec(Hive db, HiveConf conf, ASTNode ast, boolean allowDynamicPartitionsSpec, boolean allowPartialPartitionsSpec) throws SemanticException { assert (ast.getToken().getType() == HiveParser.TOK_TAB @@ -1155,6 +1172,7 @@ public String toString() { private List colName; private List colType; private boolean tblLvl; + Map partSpec; public String getTableName() { @@ -1188,6 +1206,14 @@ public void setTblLvl(boolean isTblLvl) { public void setColType(List colType) { this.colType = colType; } + + public Map getPartSpec() { + return partSpec; + } + + public void setPartSpec(Map partSpec) { + this.partSpec = partSpec; + } } /** diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java index 93b8183..246ea38 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java @@ -356,13 +356,13 @@ public void analyze(ASTNode ast, Context origCtx) throws SemanticException { * command - table stats are collected by the table scan operator and is not rewritten to * an aggregation. */ + Map partSpec = null; if (shouldRewrite(ast)) { tbl = AnalyzeCommandUtils.getTable(ast, this); colNames = getColumnName(ast); // Save away the original AST originalTree = ast; boolean isPartitionStats = AnalyzeCommandUtils.isPartitionLevelStats(ast); - Map partSpec = null; checkForPartitionColumns( colNames, Utilities.getColumnNamesFromFieldSchema(tbl.getPartitionKeys())); validateSpecifiedColumnNames(colNames); @@ -403,6 +403,7 @@ public void analyze(ASTNode ast, Context origCtx) throws SemanticException { analyzeRewrite.setTblLvl(isTableLevel); analyzeRewrite.setColName(colNames); analyzeRewrite.setColType(colType); + analyzeRewrite.setPartSpec(partSpec); qbp.setAnalyzeRewrite(analyzeRewrite); initCtx(ctx); ctx.setExplainConfig(origCtx.getExplainConfig()); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/ProcessAnalyzeTable.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/ProcessAnalyzeTable.java index c13a404..c374d9e 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/ProcessAnalyzeTable.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/ProcessAnalyzeTable.java @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.parse; +import java.io.Serializable; +import java.util.ArrayList; import java.util.List; import java.util.Set; import java.util.Stack; @@ -30,14 +32,17 @@ import org.apache.hadoop.hive.ql.exec.TableScanOperator; import org.apache.hadoop.hive.ql.exec.Task; import org.apache.hadoop.hive.ql.exec.TaskFactory; +import org.apache.hadoop.hive.ql.hooks.WriteEntity; import org.apache.hadoop.hive.ql.io.orc.OrcInputFormat; import org.apache.hadoop.hive.ql.io.rcfile.stats.PartialScanWork; import org.apache.hadoop.hive.ql.lib.Node; import org.apache.hadoop.hive.ql.lib.NodeProcessor; import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx; +import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.metadata.Partition; import org.apache.hadoop.hive.ql.metadata.Table; import org.apache.hadoop.hive.ql.optimizer.GenMapRedUtils; +import org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer.TableSpec; import org.apache.hadoop.hive.ql.plan.MapWork; import org.apache.hadoop.hive.ql.plan.StatsNoJobWork; import org.apache.hadoop.hive.ql.plan.StatsWork; @@ -77,19 +82,20 @@ public Object process(Node nd, Stack stack, Class inputFormat = tableScan.getConf().getTableMetadata() .getInputFormatClass(); - if (parseContext.getQueryProperties().isAnalyzeCommand()) { - - assert tableScan.getChildOperators() == null - || tableScan.getChildOperators().size() == 0; + if (parseContext.getAnalyzeRewrite() == null) { + assert tableScan.getChildOperators() == null || tableScan.getChildOperators().size() == 0; + } - String alias = null; - for (String a: parseContext.getTopOps().keySet()) { - if (tableScan == parseContext.getTopOps().get(a)) { - alias = a; - } + String alias = null; + for (String a: parseContext.getTopOps().keySet()) { + if (tableScan == parseContext.getTopOps().get(a)) { + alias = a; } + } + + assert alias != null; + if (parseContext.getQueryProperties().isAnalyzeCommand()) { - assert alias != null; TezWork tezWork = context.currentTask.getWork(); if (inputFormat.equals(OrcInputFormat.class)) { @@ -160,10 +166,61 @@ public Object process(Node nd, Stack stack, return true; } } + else if (parseContext.getAnalyzeRewrite() != null) { + // we need to collect table stats while collecting column stats. + try { + context.currentTask.addDependentTask(genTableStats(context, alias, tableScan)); + } catch (HiveException e) { + throw new SemanticException(e); + } + } return null; } + private Task genTableStats(GenTezProcContext context, String alias, TableScanOperator tableScan) throws HiveException { + Class inputFormat = tableScan.getConf().getTableMetadata() + .getInputFormatClass(); + ParseContext parseContext = context.parseContext; + Table table = tableScan.getConf().getTableMetadata(); + List partitions = new ArrayList<>(); + if (table.isPartitioned()) { + partitions.addAll(parseContext.getPrunedPartitions(tableScan).getPartitions()); + for (Partition partn : partitions) { + // inputs.add(new ReadEntity(partn)); // is this needed at all? + LOG.info("XXX: adding part: " + partn); + context.outputs.add(new WriteEntity(partn, WriteEntity.WriteType.DDL_NO_LOCK)); + } + } + TableSpec tableSpec = new TableSpec(table, partitions); + tableScan.getConf().getTableMetadata().setTableSpec(tableSpec); + + if (inputFormat.equals(OrcInputFormat.class)) { + // For ORC, all the following statements are the same + StatsNoJobWork snjWork = new StatsNoJobWork(tableScan.getConf().getTableMetadata() + .getTableSpec()); + snjWork.setStatsReliable(parseContext.getConf().getBoolVar( + HiveConf.ConfVars.HIVE_STATS_RELIABLE)); + // If partition is specified, get pruned partition list + if (partitions.size() > 0) { + snjWork.setPrunedPartitionList(parseContext.getPrunedPartitions(tableScan)); + } + return TaskFactory.get(snjWork, parseContext.getConf()); + } else { + + // ANALYZE TABLE T [PARTITION (...)] COMPUTE STATISTICS; + // The plan consists of a simple TezTask followed by a StatsTask. + // The Tez task is just a simple TableScanOperator + + StatsWork statsWork = new StatsWork(tableScan.getConf().getTableMetadata().getTableSpec()); + statsWork.setAggKey(tableScan.getConf().getStatsAggPrefix()); + statsWork.setStatsTmpDir(tableScan.getConf().getTmpStatsDir()); + statsWork.setSourceTask(context.currentTask); + statsWork.setStatsReliable(parseContext.getConf().getBoolVar( + HiveConf.ConfVars.HIVE_STATS_RELIABLE)); + return TaskFactory.get(statsWork, parseContext.getConf()); + } + } /** * handle partial scan command. * diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java index 0872e53..fc16d8e 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java @@ -10256,7 +10256,7 @@ private void setupStats(TableScanDesc tsDesc, QBParseInfo qbp, Table tab, String RowResolver rwsch) throws SemanticException { - if (!qbp.isAnalyzeCommand()) { + if (!qbp.isAnalyzeCommand() && qbp.getAnalyzeRewrite() == null) { tsDesc.setGatherStats(false); } else { if (HiveConf.getVar(conf, HIVESTATSDBCLASS).equalsIgnoreCase(StatDB.fs.name())) { @@ -10279,15 +10279,6 @@ private void setupStats(TableScanDesc tsDesc, QBParseInfo qbp, Table tab, String tsDesc.addVirtualCols(vcList); String tblName = tab.getTableName(); - TableSpec tblSpec = qbp.getTableSpec(alias); - Map partSpec = tblSpec.getPartSpec(); - - if (partSpec != null) { - List cols = new ArrayList(); - cols.addAll(partSpec.keySet()); - tsDesc.setPartColumns(cols); - } - // Theoretically the key prefix could be any unique string shared // between TableScanOperator (when publishing) and StatsTask (when aggregating). // Here we use @@ -10296,13 +10287,27 @@ private void setupStats(TableScanDesc tsDesc, QBParseInfo qbp, Table tab, String // Currently, partition spec can only be static partition. String k = MetaStoreUtils.encodeTableName(tblName) + Path.SEPARATOR; tsDesc.setStatsAggPrefix(tab.getDbName()+"."+k); - + // set up WriteEntity for replication outputs.add(new WriteEntity(tab, WriteEntity.WriteType.DDL_SHARED)); // add WriteEntity for each matching partition if (tab.isPartitioned()) { - if (partSpec == null) { + List cols = new ArrayList(); + if (qbp.getAnalyzeRewrite() != null) { + List partitionCols = tab.getPartCols(); + for (FieldSchema fs : partitionCols) { + cols.add(fs.getName()); + } + tsDesc.setPartColumns(cols); + return; + } + TableSpec tblSpec = qbp.getTableSpec(alias); + Map partSpec = tblSpec.getPartSpec(); + if (partSpec != null) { + cols.addAll(partSpec.keySet()); + tsDesc.setPartColumns(cols); + } else { throw new SemanticException(ErrorMsg.NEED_PARTITION_SPECIFICATION.getMsg()); } List partitions = qbp.getTableSpec().partitions; diff --git a/ql/src/test/queries/clientpositive/column_table_stats.q b/ql/src/test/queries/clientpositive/column_table_stats.q new file mode 100644 index 0000000..5a54855 --- /dev/null +++ b/ql/src/test/queries/clientpositive/column_table_stats.q @@ -0,0 +1,65 @@ +set hive.mapred.mode=nonstrict; +-- SORT_QUERY_RESULTS + +DROP TABLE IF EXISTS s; + +CREATE TABLE s (key STRING COMMENT 'default', value STRING COMMENT 'default') STORED AS TEXTFILE; + +LOAD DATA LOCAL INPATH '../../data/files/kv1.txt' INTO TABLE s; + +desc formatted s; + +explain extended analyze table s compute statistics for columns; + +analyze table s compute statistics for columns; + +desc formatted s; + +DROP TABLE IF EXISTS spart; + +CREATE TABLE spart (key STRING COMMENT 'default', value STRING COMMENT 'default') +PARTITIONED BY (ds STRING, hr STRING) +STORED AS TEXTFILE; + +LOAD DATA LOCAL INPATH "../../data/files/kv1.txt" +OVERWRITE INTO TABLE spart PARTITION (ds="2008-04-08", hr="11"); + +LOAD DATA LOCAL INPATH "../../data/files/kv1.txt" +OVERWRITE INTO TABLE spart PARTITION (ds="2008-04-08", hr="12"); + + +desc formatted spart; + +explain extended analyze table spart compute statistics for columns; + +analyze table spart compute statistics for columns; + +desc formatted spart; + +desc formatted spart PARTITION(ds='2008-04-08', hr=11); +desc formatted spart PARTITION(ds='2008-04-08', hr=12); + + +DROP TABLE IF EXISTS spart; + +CREATE TABLE spart (key STRING COMMENT 'default', value STRING COMMENT 'default') +PARTITIONED BY (ds STRING, hr STRING) +STORED AS TEXTFILE; + +LOAD DATA LOCAL INPATH "../../data/files/kv1.txt" +OVERWRITE INTO TABLE spart PARTITION (ds="2008-04-08", hr="11"); + +LOAD DATA LOCAL INPATH "../../data/files/kv1.txt" +OVERWRITE INTO TABLE spart PARTITION (ds="2008-04-08", hr="12"); + + +desc formatted spart; + +explain extended analyze table spart partition(hr="11") compute statistics for columns; + +analyze table spart partition(hr="11") compute statistics for columns; + +desc formatted spart; + +desc formatted spart PARTITION(ds='2008-04-08', hr=11); +desc formatted spart PARTITION(ds='2008-04-08', hr=12); diff --git a/ql/src/test/queries/clientpositive/exec_parallel_column_stats.q b/ql/src/test/queries/clientpositive/exec_parallel_column_stats.q index ceacc24..09dacae 100644 --- a/ql/src/test/queries/clientpositive/exec_parallel_column_stats.q +++ b/ql/src/test/queries/clientpositive/exec_parallel_column_stats.q @@ -1,5 +1,7 @@ set hive.exec.parallel=true; -explain analyze table src compute statistics for columns; +create table s like src; -analyze table src compute statistics for columns; \ No newline at end of file +explain analyze table s compute statistics for columns; + +analyze table s compute statistics for columns; diff --git a/ql/src/test/results/clientpositive/alter_table_update_status.q.out b/ql/src/test/results/clientpositive/alter_table_update_status.q.out index a3c4f1a..c9098ce 100644 --- a/ql/src/test/results/clientpositive/alter_table_update_status.q.out +++ b/ql/src/test/results/clientpositive/alter_table_update_status.q.out @@ -35,10 +35,12 @@ POSTHOOK: Output: default@src_stat_int PREHOOK: query: ANALYZE TABLE src_stat COMPUTE STATISTICS for columns key PREHOOK: type: QUERY PREHOOK: Input: default@src_stat +PREHOOK: Output: default@src_stat #### A masked pattern was here #### POSTHOOK: query: ANALYZE TABLE src_stat COMPUTE STATISTICS for columns key POSTHOOK: type: QUERY POSTHOOK: Input: default@src_stat +POSTHOOK: Output: default@src_stat #### A masked pattern was here #### PREHOOK: query: describe formatted src_stat key PREHOOK: type: DESCTABLE @@ -78,10 +80,12 @@ value string PREHOOK: query: ANALYZE TABLE src_stat_int COMPUTE STATISTICS for columns key PREHOOK: type: QUERY PREHOOK: Input: default@src_stat_int +PREHOOK: Output: default@src_stat_int #### A masked pattern was here #### POSTHOOK: query: ANALYZE TABLE src_stat_int COMPUTE STATISTICS for columns key POSTHOOK: type: QUERY POSTHOOK: Input: default@src_stat_int +POSTHOOK: Output: default@src_stat_int #### A masked pattern was here #### PREHOOK: query: describe formatted src_stat_int key PREHOOK: type: DESCTABLE diff --git a/ql/src/test/results/clientpositive/analyze_tbl_date.q.out b/ql/src/test/results/clientpositive/analyze_tbl_date.q.out index a0cdbca..eac13cc 100644 --- a/ql/src/test/results/clientpositive/analyze_tbl_date.q.out +++ b/ql/src/test/results/clientpositive/analyze_tbl_date.q.out @@ -16,10 +16,12 @@ POSTHOOK: Lineage: test_table.d EXPRESSION [(values__tmp__table__1)values__tmp__ PREHOOK: query: analyze table test_table compute statistics for columns PREHOOK: type: QUERY PREHOOK: Input: default@test_table +PREHOOK: Output: default@test_table #### A masked pattern was here #### POSTHOOK: query: analyze table test_table compute statistics for columns POSTHOOK: type: QUERY POSTHOOK: Input: default@test_table +POSTHOOK: Output: default@test_table #### A masked pattern was here #### PREHOOK: query: describe formatted test_table PREHOOK: type: DESCTABLE diff --git a/ql/src/test/results/clientpositive/analyze_tbl_part.q.out b/ql/src/test/results/clientpositive/analyze_tbl_part.q.out index ed90b6f..cae6973 100644 --- a/ql/src/test/results/clientpositive/analyze_tbl_part.q.out +++ b/ql/src/test/results/clientpositive/analyze_tbl_part.q.out @@ -35,12 +35,18 @@ PREHOOK: type: QUERY PREHOOK: Input: default@src_stat_part PREHOOK: Input: default@src_stat_part@partitionid=1 PREHOOK: Input: default@src_stat_part@partitionid=2 +PREHOOK: Output: default@src_stat_part +PREHOOK: Output: default@src_stat_part@partitionid=1 +PREHOOK: Output: default@src_stat_part@partitionid=2 #### A masked pattern was here #### POSTHOOK: query: ANALYZE TABLE src_stat_part partition (partitionId) COMPUTE STATISTICS for columns key POSTHOOK: type: QUERY POSTHOOK: Input: default@src_stat_part POSTHOOK: Input: default@src_stat_part@partitionid=1 POSTHOOK: Input: default@src_stat_part@partitionid=2 +POSTHOOK: Output: default@src_stat_part +POSTHOOK: Output: default@src_stat_part@partitionid=1 +POSTHOOK: Output: default@src_stat_part@partitionid=2 #### A masked pattern was here #### PREHOOK: query: describe formatted src_stat_part PARTITION(partitionId=1) key PREHOOK: type: DESCTABLE @@ -56,12 +62,18 @@ PREHOOK: type: QUERY PREHOOK: Input: default@src_stat_part PREHOOK: Input: default@src_stat_part@partitionid=1 PREHOOK: Input: default@src_stat_part@partitionid=2 +PREHOOK: Output: default@src_stat_part +PREHOOK: Output: default@src_stat_part@partitionid=1 +PREHOOK: Output: default@src_stat_part@partitionid=2 #### A masked pattern was here #### POSTHOOK: query: ANALYZE TABLE src_stat_part partition (partitionId) COMPUTE STATISTICS for columns key, value POSTHOOK: type: QUERY POSTHOOK: Input: default@src_stat_part POSTHOOK: Input: default@src_stat_part@partitionid=1 POSTHOOK: Input: default@src_stat_part@partitionid=2 +POSTHOOK: Output: default@src_stat_part +POSTHOOK: Output: default@src_stat_part@partitionid=1 +POSTHOOK: Output: default@src_stat_part@partitionid=2 #### A masked pattern was here #### PREHOOK: query: describe formatted src_stat_part PARTITION(partitionId=1) key PREHOOK: type: DESCTABLE diff --git a/ql/src/test/results/clientpositive/annotate_stats_deep_filters.q.out b/ql/src/test/results/clientpositive/annotate_stats_deep_filters.q.out index 32644dc..1b9efc2 100644 --- a/ql/src/test/results/clientpositive/annotate_stats_deep_filters.q.out +++ b/ql/src/test/results/clientpositive/annotate_stats_deep_filters.q.out @@ -59,10 +59,12 @@ POSTHOOK: Output: default@over1k PREHOOK: query: analyze table over1k compute statistics for columns PREHOOK: type: QUERY PREHOOK: Input: default@over1k +PREHOOK: Output: default@over1k #### A masked pattern was here #### POSTHOOK: query: analyze table over1k compute statistics for columns POSTHOOK: type: QUERY POSTHOOK: Input: default@over1k +POSTHOOK: Output: default@over1k #### A masked pattern was here #### PREHOOK: query: explain select count(*) from over1k where ( (t=1 and si=2) diff --git a/ql/src/test/results/clientpositive/annotate_stats_filter.q.out b/ql/src/test/results/clientpositive/annotate_stats_filter.q.out index e22c3ef..aeb9719 100644 --- a/ql/src/test/results/clientpositive/annotate_stats_filter.q.out +++ b/ql/src/test/results/clientpositive/annotate_stats_filter.q.out @@ -112,10 +112,12 @@ STAGE PLANS: PREHOOK: query: analyze table loc_orc compute statistics for columns state,locid,zip,year PREHOOK: type: QUERY PREHOOK: Input: default@loc_orc +PREHOOK: Output: default@loc_orc #### A masked pattern was here #### POSTHOOK: query: analyze table loc_orc compute statistics for columns state,locid,zip,year POSTHOOK: type: QUERY POSTHOOK: Input: default@loc_orc +POSTHOOK: Output: default@loc_orc #### A masked pattern was here #### PREHOOK: query: explain select * from loc_orc where state='OH' PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/annotate_stats_groupby.q.out b/ql/src/test/results/clientpositive/annotate_stats_groupby.q.out index a8e4854..95017f8 100644 --- a/ql/src/test/results/clientpositive/annotate_stats_groupby.q.out +++ b/ql/src/test/results/clientpositive/annotate_stats_groupby.q.out @@ -76,10 +76,12 @@ STAGE PLANS: PREHOOK: query: analyze table loc_orc compute statistics for columns state PREHOOK: type: QUERY PREHOOK: Input: default@loc_orc +PREHOOK: Output: default@loc_orc #### A masked pattern was here #### POSTHOOK: query: analyze table loc_orc compute statistics for columns state POSTHOOK: type: QUERY POSTHOOK: Input: default@loc_orc +POSTHOOK: Output: default@loc_orc #### A masked pattern was here #### PREHOOK: query: explain select a, c, min(b) from ( select state as a, locid as b, count(*) as c @@ -177,10 +179,12 @@ STAGE PLANS: PREHOOK: query: analyze table loc_orc compute statistics for columns state,locid,year PREHOOK: type: QUERY PREHOOK: Input: default@loc_orc +PREHOOK: Output: default@loc_orc #### A masked pattern was here #### POSTHOOK: query: analyze table loc_orc compute statistics for columns state,locid,year POSTHOOK: type: QUERY POSTHOOK: Input: default@loc_orc +POSTHOOK: Output: default@loc_orc #### A masked pattern was here #### PREHOOK: query: explain select year from loc_orc group by year PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/annotate_stats_groupby2.q.out b/ql/src/test/results/clientpositive/annotate_stats_groupby2.q.out index 31c4ed1..d95af92 100644 --- a/ql/src/test/results/clientpositive/annotate_stats_groupby2.q.out +++ b/ql/src/test/results/clientpositive/annotate_stats_groupby2.q.out @@ -29,10 +29,12 @@ POSTHOOK: Output: default@location PREHOOK: query: analyze table location compute statistics for columns state, country PREHOOK: type: QUERY PREHOOK: Input: default@location +PREHOOK: Output: default@location #### A masked pattern was here #### POSTHOOK: query: analyze table location compute statistics for columns state, country POSTHOOK: type: QUERY POSTHOOK: Input: default@location +POSTHOOK: Output: default@location #### A masked pattern was here #### PREHOOK: query: explain select state, country from location group by state, country PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/annotate_stats_join.q.out b/ql/src/test/results/clientpositive/annotate_stats_join.q.out index 5d4fe6c..685088d 100644 --- a/ql/src/test/results/clientpositive/annotate_stats_join.q.out +++ b/ql/src/test/results/clientpositive/annotate_stats_join.q.out @@ -97,26 +97,32 @@ POSTHOOK: Output: default@loc PREHOOK: query: analyze table emp compute statistics for columns lastname,deptid,locid PREHOOK: type: QUERY PREHOOK: Input: default@emp +PREHOOK: Output: default@emp #### A masked pattern was here #### POSTHOOK: query: analyze table emp compute statistics for columns lastname,deptid,locid POSTHOOK: type: QUERY POSTHOOK: Input: default@emp +POSTHOOK: Output: default@emp #### A masked pattern was here #### PREHOOK: query: analyze table dept compute statistics for columns deptname,deptid PREHOOK: type: QUERY PREHOOK: Input: default@dept +PREHOOK: Output: default@dept #### A masked pattern was here #### POSTHOOK: query: analyze table dept compute statistics for columns deptname,deptid POSTHOOK: type: QUERY POSTHOOK: Input: default@dept +POSTHOOK: Output: default@dept #### A masked pattern was here #### PREHOOK: query: analyze table loc compute statistics for columns state,locid,zip,year PREHOOK: type: QUERY PREHOOK: Input: default@loc +PREHOOK: Output: default@loc #### A masked pattern was here #### POSTHOOK: query: analyze table loc compute statistics for columns state,locid,zip,year POSTHOOK: type: QUERY POSTHOOK: Input: default@loc +POSTHOOK: Output: default@loc #### A masked pattern was here #### PREHOOK: query: explain select * from emp e join dept d on (e.deptid = d.deptid) PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/annotate_stats_join_pkfk.q.out b/ql/src/test/results/clientpositive/annotate_stats_join_pkfk.q.out index b4d46d2..e74554d 100644 --- a/ql/src/test/results/clientpositive/annotate_stats_join_pkfk.q.out +++ b/ql/src/test/results/clientpositive/annotate_stats_join_pkfk.q.out @@ -297,10 +297,12 @@ POSTHOOK: Output: default@store PREHOOK: query: analyze table store compute statistics for columns s_store_sk, s_floor_space PREHOOK: type: QUERY PREHOOK: Input: default@store +PREHOOK: Output: default@store #### A masked pattern was here #### POSTHOOK: query: analyze table store compute statistics for columns s_store_sk, s_floor_space POSTHOOK: type: QUERY POSTHOOK: Input: default@store +POSTHOOK: Output: default@store #### A masked pattern was here #### PREHOOK: query: analyze table store_bigint compute statistics PREHOOK: type: QUERY @@ -313,10 +315,12 @@ POSTHOOK: Output: default@store_bigint PREHOOK: query: analyze table store_bigint compute statistics for columns s_store_sk, s_floor_space PREHOOK: type: QUERY PREHOOK: Input: default@store_bigint +PREHOOK: Output: default@store_bigint #### A masked pattern was here #### POSTHOOK: query: analyze table store_bigint compute statistics for columns s_store_sk, s_floor_space POSTHOOK: type: QUERY POSTHOOK: Input: default@store_bigint +POSTHOOK: Output: default@store_bigint #### A masked pattern was here #### PREHOOK: query: analyze table store_sales compute statistics PREHOOK: type: QUERY @@ -329,10 +333,12 @@ POSTHOOK: Output: default@store_sales PREHOOK: query: analyze table store_sales compute statistics for columns ss_store_sk, ss_addr_sk, ss_quantity PREHOOK: type: QUERY PREHOOK: Input: default@store_sales +PREHOOK: Output: default@store_sales #### A masked pattern was here #### POSTHOOK: query: analyze table store_sales compute statistics for columns ss_store_sk, ss_addr_sk, ss_quantity POSTHOOK: type: QUERY POSTHOOK: Input: default@store_sales +POSTHOOK: Output: default@store_sales #### A masked pattern was here #### PREHOOK: query: analyze table customer_address compute statistics PREHOOK: type: QUERY @@ -345,10 +351,12 @@ POSTHOOK: Output: default@customer_address PREHOOK: query: analyze table customer_address compute statistics for columns ca_address_sk PREHOOK: type: QUERY PREHOOK: Input: default@customer_address +PREHOOK: Output: default@customer_address #### A masked pattern was here #### POSTHOOK: query: analyze table customer_address compute statistics for columns ca_address_sk POSTHOOK: type: QUERY POSTHOOK: Input: default@customer_address +POSTHOOK: Output: default@customer_address #### A masked pattern was here #### PREHOOK: query: explain select s.s_store_sk from store s join store_sales ss on (s.s_store_sk = ss.ss_store_sk) PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/annotate_stats_limit.q.out b/ql/src/test/results/clientpositive/annotate_stats_limit.q.out index ea181cb..5139db4 100644 --- a/ql/src/test/results/clientpositive/annotate_stats_limit.q.out +++ b/ql/src/test/results/clientpositive/annotate_stats_limit.q.out @@ -55,10 +55,12 @@ POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name PREHOOK: query: analyze table loc_orc compute statistics for columns state, locid, zip, year PREHOOK: type: QUERY PREHOOK: Input: default@loc_orc +PREHOOK: Output: default@loc_orc #### A masked pattern was here #### POSTHOOK: query: analyze table loc_orc compute statistics for columns state, locid, zip, year POSTHOOK: type: QUERY POSTHOOK: Input: default@loc_orc +POSTHOOK: Output: default@loc_orc #### A masked pattern was here #### PREHOOK: query: explain select * from loc_orc PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/annotate_stats_select.q.out b/ql/src/test/results/clientpositive/annotate_stats_select.q.out index 873f1ab..af83385 100644 --- a/ql/src/test/results/clientpositive/annotate_stats_select.q.out +++ b/ql/src/test/results/clientpositive/annotate_stats_select.q.out @@ -113,10 +113,12 @@ STAGE PLANS: PREHOOK: query: analyze table alltypes_orc compute statistics for columns bo1, ti1, si1, i1, bi1, f1, d1, s1, vc1 PREHOOK: type: QUERY PREHOOK: Input: default@alltypes_orc +PREHOOK: Output: default@alltypes_orc #### A masked pattern was here #### POSTHOOK: query: analyze table alltypes_orc compute statistics for columns bo1, ti1, si1, i1, bi1, f1, d1, s1, vc1 POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypes_orc +POSTHOOK: Output: default@alltypes_orc #### A masked pattern was here #### PREHOOK: query: explain select * from alltypes_orc PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/annotate_stats_table.q.out b/ql/src/test/results/clientpositive/annotate_stats_table.q.out index efc3c1f..d8c0dfd 100644 --- a/ql/src/test/results/clientpositive/annotate_stats_table.q.out +++ b/ql/src/test/results/clientpositive/annotate_stats_table.q.out @@ -120,10 +120,12 @@ STAGE PLANS: PREHOOK: query: analyze table emp_orc compute statistics for columns deptid PREHOOK: type: QUERY PREHOOK: Input: default@emp_orc +PREHOOK: Output: default@emp_orc #### A masked pattern was here #### POSTHOOK: query: analyze table emp_orc compute statistics for columns deptid POSTHOOK: type: QUERY POSTHOOK: Input: default@emp_orc +POSTHOOK: Output: default@emp_orc #### A masked pattern was here #### PREHOOK: query: explain select * from emp_orc PREHOOK: type: QUERY @@ -170,10 +172,12 @@ STAGE PLANS: PREHOOK: query: analyze table emp_orc compute statistics for columns lastname,deptid PREHOOK: type: QUERY PREHOOK: Input: default@emp_orc +PREHOOK: Output: default@emp_orc #### A masked pattern was here #### POSTHOOK: query: analyze table emp_orc compute statistics for columns lastname,deptid POSTHOOK: type: QUERY POSTHOOK: Input: default@emp_orc +POSTHOOK: Output: default@emp_orc #### A masked pattern was here #### PREHOOK: query: explain select * from emp_orc PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/annotate_stats_union.q.out b/ql/src/test/results/clientpositive/annotate_stats_union.q.out index 059f261..3b4b169 100644 --- a/ql/src/test/results/clientpositive/annotate_stats_union.q.out +++ b/ql/src/test/results/clientpositive/annotate_stats_union.q.out @@ -55,10 +55,12 @@ POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name PREHOOK: query: analyze table loc_orc compute statistics for columns state,locid,zip,year PREHOOK: type: QUERY PREHOOK: Input: default@loc_orc +PREHOOK: Output: default@loc_orc #### A masked pattern was here #### POSTHOOK: query: analyze table loc_orc compute statistics for columns state,locid,zip,year POSTHOOK: type: QUERY POSTHOOK: Input: default@loc_orc +POSTHOOK: Output: default@loc_orc #### A masked pattern was here #### PREHOOK: query: explain select state from loc_orc PREHOOK: type: QUERY @@ -282,18 +284,22 @@ PREHOOK: query: analyze table loc_staging compute statistics for columns state,l PREHOOK: type: QUERY PREHOOK: Input: test@loc_staging #### A masked pattern was here #### +PREHOOK: Output: test@loc_staging POSTHOOK: query: analyze table loc_staging compute statistics for columns state,locid,zip,year POSTHOOK: type: QUERY POSTHOOK: Input: test@loc_staging #### A masked pattern was here #### +POSTHOOK: Output: test@loc_staging PREHOOK: query: analyze table loc_orc compute statistics for columns state,locid,zip,year PREHOOK: type: QUERY PREHOOK: Input: test@loc_orc #### A masked pattern was here #### +PREHOOK: Output: test@loc_orc POSTHOOK: query: analyze table loc_orc compute statistics for columns state,locid,zip,year POSTHOOK: type: QUERY POSTHOOK: Input: test@loc_orc #### A masked pattern was here #### +POSTHOOK: Output: test@loc_orc PREHOOK: query: explain select * from (select state from default.loc_orc union all select state from test.loc_orc) temp PREHOOK: type: QUERY POSTHOOK: query: explain select * from (select state from default.loc_orc union all select state from test.loc_orc) temp diff --git a/ql/src/test/results/clientpositive/autoColumnStats_3.q.out b/ql/src/test/results/clientpositive/autoColumnStats_3.q.out index dca158b..efdbdd6 100644 --- a/ql/src/test/results/clientpositive/autoColumnStats_3.q.out +++ b/ql/src/test/results/clientpositive/autoColumnStats_3.q.out @@ -13,10 +13,12 @@ POSTHOOK: Output: default@src_multi1 PREHOOK: query: analyze table src_multi1 compute statistics for columns key PREHOOK: type: QUERY PREHOOK: Input: default@src_multi1 +PREHOOK: Output: default@src_multi1 #### A masked pattern was here #### POSTHOOK: query: analyze table src_multi1 compute statistics for columns key POSTHOOK: type: QUERY POSTHOOK: Input: default@src_multi1 +POSTHOOK: Output: default@src_multi1 #### A masked pattern was here #### PREHOOK: query: describe formatted src_multi1 PREHOOK: type: DESCTABLE diff --git a/ql/src/test/results/clientpositive/avro_decimal.q.out b/ql/src/test/results/clientpositive/avro_decimal.q.out index 64e65ca..11aaa15 100644 --- a/ql/src/test/results/clientpositive/avro_decimal.q.out +++ b/ql/src/test/results/clientpositive/avro_decimal.q.out @@ -21,10 +21,12 @@ POSTHOOK: Output: default@dec PREHOOK: query: ANALYZE TABLE dec COMPUTE STATISTICS FOR COLUMNS value PREHOOK: type: QUERY PREHOOK: Input: default@dec +PREHOOK: Output: default@dec #### A masked pattern was here #### POSTHOOK: query: ANALYZE TABLE dec COMPUTE STATISTICS FOR COLUMNS value POSTHOOK: type: QUERY POSTHOOK: Input: default@dec +POSTHOOK: Output: default@dec #### A masked pattern was here #### PREHOOK: query: DESC FORMATTED dec value PREHOOK: type: DESCTABLE diff --git a/ql/src/test/results/clientpositive/avro_decimal_native.q.out b/ql/src/test/results/clientpositive/avro_decimal_native.q.out index cebc342..1fd98fa 100644 --- a/ql/src/test/results/clientpositive/avro_decimal_native.q.out +++ b/ql/src/test/results/clientpositive/avro_decimal_native.q.out @@ -25,10 +25,12 @@ POSTHOOK: Output: default@dec PREHOOK: query: ANALYZE TABLE dec COMPUTE STATISTICS FOR COLUMNS value PREHOOK: type: QUERY PREHOOK: Input: default@dec +PREHOOK: Output: default@dec #### A masked pattern was here #### POSTHOOK: query: ANALYZE TABLE dec COMPUTE STATISTICS FOR COLUMNS value POSTHOOK: type: QUERY POSTHOOK: Input: default@dec +POSTHOOK: Output: default@dec #### A masked pattern was here #### PREHOOK: query: DESC FORMATTED dec value PREHOOK: type: DESCTABLE diff --git a/ql/src/test/results/clientpositive/cbo_rp_annotate_stats_groupby.q.out b/ql/src/test/results/clientpositive/cbo_rp_annotate_stats_groupby.q.out index f260f03..045b0a9 100644 --- a/ql/src/test/results/clientpositive/cbo_rp_annotate_stats_groupby.q.out +++ b/ql/src/test/results/clientpositive/cbo_rp_annotate_stats_groupby.q.out @@ -76,10 +76,12 @@ STAGE PLANS: PREHOOK: query: analyze table loc_orc compute statistics for columns state PREHOOK: type: QUERY PREHOOK: Input: default@loc_orc +PREHOOK: Output: default@loc_orc #### A masked pattern was here #### POSTHOOK: query: analyze table loc_orc compute statistics for columns state POSTHOOK: type: QUERY POSTHOOK: Input: default@loc_orc +POSTHOOK: Output: default@loc_orc #### A masked pattern was here #### PREHOOK: query: explain select a, c, min(b) from ( select state as a, locid as b, count(*) as c @@ -177,10 +179,12 @@ STAGE PLANS: PREHOOK: query: analyze table loc_orc compute statistics for columns state,locid,year PREHOOK: type: QUERY PREHOOK: Input: default@loc_orc +PREHOOK: Output: default@loc_orc #### A masked pattern was here #### POSTHOOK: query: analyze table loc_orc compute statistics for columns state,locid,year POSTHOOK: type: QUERY POSTHOOK: Input: default@loc_orc +POSTHOOK: Output: default@loc_orc #### A masked pattern was here #### PREHOOK: query: explain select year from loc_orc group by year PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/cbo_rp_auto_join1.q.out b/ql/src/test/results/clientpositive/cbo_rp_auto_join1.q.out index fe4bc4f..96f2d96 100644 --- a/ql/src/test/results/clientpositive/cbo_rp_auto_join1.q.out +++ b/ql/src/test/results/clientpositive/cbo_rp_auto_join1.q.out @@ -49,10 +49,12 @@ POSTHOOK: Output: default@tbl1 PREHOOK: query: analyze table tbl1 compute statistics for columns PREHOOK: type: QUERY PREHOOK: Input: default@tbl1 +PREHOOK: Output: default@tbl1 #### A masked pattern was here #### POSTHOOK: query: analyze table tbl1 compute statistics for columns POSTHOOK: type: QUERY POSTHOOK: Input: default@tbl1 +POSTHOOK: Output: default@tbl1 #### A masked pattern was here #### PREHOOK: query: analyze table tbl2 compute statistics PREHOOK: type: QUERY @@ -65,10 +67,12 @@ POSTHOOK: Output: default@tbl2 PREHOOK: query: analyze table tbl2 compute statistics for columns PREHOOK: type: QUERY PREHOOK: Input: default@tbl2 +PREHOOK: Output: default@tbl2 #### A masked pattern was here #### POSTHOOK: query: analyze table tbl2 compute statistics for columns POSTHOOK: type: QUERY POSTHOOK: Input: default@tbl2 +POSTHOOK: Output: default@tbl2 #### A masked pattern was here #### PREHOOK: query: explain select count(*) from ( diff --git a/ql/src/test/results/clientpositive/colstats_all_nulls.q.out b/ql/src/test/results/clientpositive/colstats_all_nulls.q.out index f67f81b..b0a941a 100644 --- a/ql/src/test/results/clientpositive/colstats_all_nulls.q.out +++ b/ql/src/test/results/clientpositive/colstats_all_nulls.q.out @@ -30,10 +30,12 @@ POSTHOOK: Lineage: all_nulls.c SIMPLE [] PREHOOK: query: analyze table all_nulls compute statistics for columns PREHOOK: type: QUERY PREHOOK: Input: default@all_nulls +PREHOOK: Output: default@all_nulls #### A masked pattern was here #### POSTHOOK: query: analyze table all_nulls compute statistics for columns POSTHOOK: type: QUERY POSTHOOK: Input: default@all_nulls +POSTHOOK: Output: default@all_nulls #### A masked pattern was here #### PREHOOK: query: describe formatted all_nulls a PREHOOK: type: DESCTABLE diff --git a/ql/src/test/results/clientpositive/columnStatsUpdateForStatsOptimizer_2.q.out b/ql/src/test/results/clientpositive/columnStatsUpdateForStatsOptimizer_2.q.out index af21343..8449f7e 100644 --- a/ql/src/test/results/clientpositive/columnStatsUpdateForStatsOptimizer_2.q.out +++ b/ql/src/test/results/clientpositive/columnStatsUpdateForStatsOptimizer_2.q.out @@ -56,10 +56,12 @@ Storage Desc Params: PREHOOK: query: analyze table calendar compute statistics for columns year PREHOOK: type: QUERY PREHOOK: Input: default@calendar +PREHOOK: Output: default@calendar #### A masked pattern was here #### POSTHOOK: query: analyze table calendar compute statistics for columns year POSTHOOK: type: QUERY POSTHOOK: Input: default@calendar +POSTHOOK: Output: default@calendar #### A masked pattern was here #### PREHOOK: query: desc formatted calendar PREHOOK: type: DESCTABLE diff --git a/ql/src/test/results/clientpositive/columnstats_partlvl.q.out b/ql/src/test/results/clientpositive/columnstats_partlvl.q.out index 5c9f901..5b9cc37 100644 --- a/ql/src/test/results/clientpositive/columnstats_partlvl.q.out +++ b/ql/src/test/results/clientpositive/columnstats_partlvl.q.out @@ -529,7 +529,8 @@ analyze table Employee_Part compute statistics for columns POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-0 is a root stage - Stage-1 depends on stages: Stage-0 + Stage-1 depends on stages: Stage-0, Stage-2 + Stage-2 depends on stages: Stage-0 STAGE PLANS: Stage: Stage-0 @@ -580,17 +581,26 @@ STAGE PLANS: Column Types: int, string Table: default.employee_part + Stage: Stage-2 + Stats-Aggr Operator + PREHOOK: query: analyze table Employee_Part compute statistics for columns PREHOOK: type: QUERY PREHOOK: Input: default@employee_part PREHOOK: Input: default@employee_part@employeesalary=2000.0 PREHOOK: Input: default@employee_part@employeesalary=4000.0 +PREHOOK: Output: default@employee_part +PREHOOK: Output: default@employee_part@employeesalary=2000.0 +PREHOOK: Output: default@employee_part@employeesalary=4000.0 #### A masked pattern was here #### POSTHOOK: query: analyze table Employee_Part compute statistics for columns POSTHOOK: type: QUERY POSTHOOK: Input: default@employee_part POSTHOOK: Input: default@employee_part@employeesalary=2000.0 POSTHOOK: Input: default@employee_part@employeesalary=4000.0 +POSTHOOK: Output: default@employee_part +POSTHOOK: Output: default@employee_part@employeesalary=2000.0 +POSTHOOK: Output: default@employee_part@employeesalary=4000.0 #### A masked pattern was here #### PREHOOK: query: describe formatted Employee_Part partition(employeeSalary=2000.0) employeeID PREHOOK: type: DESCTABLE @@ -618,7 +628,8 @@ analyze table Employee_Part compute statistics for columns POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-0 is a root stage - Stage-1 depends on stages: Stage-0 + Stage-1 depends on stages: Stage-0, Stage-2 + Stage-2 depends on stages: Stage-0 STAGE PLANS: Stage: Stage-0 @@ -626,11 +637,11 @@ STAGE PLANS: Map Operator Tree: TableScan alias: employee_part - Statistics: Num rows: 2 Data size: 210 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 184 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: employeeid (type: int), employeename (type: string) outputColumnNames: employeeid, employeename - Statistics: Num rows: 2 Data size: 210 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 184 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: compute_stats(employeeid, 16), compute_stats(employeename, 16) mode: hash @@ -661,17 +672,26 @@ STAGE PLANS: Column Types: int, string Table: default.employee_part + Stage: Stage-2 + Stats-Aggr Operator + PREHOOK: query: analyze table Employee_Part compute statistics for columns PREHOOK: type: QUERY PREHOOK: Input: default@employee_part PREHOOK: Input: default@employee_part@employeesalary=2000.0 PREHOOK: Input: default@employee_part@employeesalary=4000.0 +PREHOOK: Output: default@employee_part +PREHOOK: Output: default@employee_part@employeesalary=2000.0 +PREHOOK: Output: default@employee_part@employeesalary=4000.0 #### A masked pattern was here #### POSTHOOK: query: analyze table Employee_Part compute statistics for columns POSTHOOK: type: QUERY POSTHOOK: Input: default@employee_part POSTHOOK: Input: default@employee_part@employeesalary=2000.0 POSTHOOK: Input: default@employee_part@employeesalary=4000.0 +POSTHOOK: Output: default@employee_part +POSTHOOK: Output: default@employee_part@employeesalary=2000.0 +POSTHOOK: Output: default@employee_part@employeesalary=4000.0 #### A masked pattern was here #### PREHOOK: query: describe formatted Employee_Part employeeID PREHOOK: type: DESCTABLE @@ -718,12 +738,19 @@ PREHOOK: type: QUERY PREHOOK: Input: default@employee_part PREHOOK: Input: default@employee_part@employeesalary=2000.0 PREHOOK: Input: default@employee_part@employeesalary=4000.0 +PREHOOK: Output: default@employee_part +PREHOOK: Output: default@employee_part@employeesalary=2000.0 +PREHOOK: Output: default@employee_part@employeesalary=4000.0 #### A masked pattern was here #### +Cannot get table employee_part POSTHOOK: query: analyze table default.Employee_Part compute statistics for columns POSTHOOK: type: QUERY POSTHOOK: Input: default@employee_part POSTHOOK: Input: default@employee_part@employeesalary=2000.0 POSTHOOK: Input: default@employee_part@employeesalary=4000.0 +POSTHOOK: Output: default@employee_part +POSTHOOK: Output: default@employee_part@employeesalary=2000.0 +POSTHOOK: Output: default@employee_part@employeesalary=4000.0 #### A masked pattern was here #### PREHOOK: query: use default PREHOOK: type: SWITCHDATABASE diff --git a/ql/src/test/results/clientpositive/columnstats_partlvl_dp.q.out b/ql/src/test/results/clientpositive/columnstats_partlvl_dp.q.out index 08fb42e..948ad86 100644 --- a/ql/src/test/results/clientpositive/columnstats_partlvl_dp.q.out +++ b/ql/src/test/results/clientpositive/columnstats_partlvl_dp.q.out @@ -241,7 +241,8 @@ analyze table Employee_Part partition (employeeSalary) compute statistics for co POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-0 is a root stage - Stage-1 depends on stages: Stage-0 + Stage-1 depends on stages: Stage-0, Stage-2 + Stage-2 depends on stages: Stage-0 STAGE PLANS: Stage: Stage-0 @@ -292,6 +293,9 @@ STAGE PLANS: Column Types: int Table: default.employee_part + Stage: Stage-2 + Stats-Aggr Operator + PREHOOK: query: analyze table Employee_Part partition (employeeSalary) compute statistics for columns employeeID PREHOOK: type: QUERY PREHOOK: Input: default@employee_part @@ -301,6 +305,13 @@ PREHOOK: Input: default@employee_part@employeesalary=3000.0/country=UK PREHOOK: Input: default@employee_part@employeesalary=3000.0/country=USA PREHOOK: Input: default@employee_part@employeesalary=3500.0/country=UK PREHOOK: Input: default@employee_part@employeesalary=4000.0/country=USA +PREHOOK: Output: default@employee_part +PREHOOK: Output: default@employee_part@employeesalary=2000.0/country=UK +PREHOOK: Output: default@employee_part@employeesalary=2000.0/country=USA +PREHOOK: Output: default@employee_part@employeesalary=3000.0/country=UK +PREHOOK: Output: default@employee_part@employeesalary=3000.0/country=USA +PREHOOK: Output: default@employee_part@employeesalary=3500.0/country=UK +PREHOOK: Output: default@employee_part@employeesalary=4000.0/country=USA #### A masked pattern was here #### POSTHOOK: query: analyze table Employee_Part partition (employeeSalary) compute statistics for columns employeeID POSTHOOK: type: QUERY @@ -311,6 +322,13 @@ POSTHOOK: Input: default@employee_part@employeesalary=3000.0/country=UK POSTHOOK: Input: default@employee_part@employeesalary=3000.0/country=USA POSTHOOK: Input: default@employee_part@employeesalary=3500.0/country=UK POSTHOOK: Input: default@employee_part@employeesalary=4000.0/country=USA +POSTHOOK: Output: default@employee_part +POSTHOOK: Output: default@employee_part@employeesalary=2000.0/country=UK +POSTHOOK: Output: default@employee_part@employeesalary=2000.0/country=USA +POSTHOOK: Output: default@employee_part@employeesalary=3000.0/country=UK +POSTHOOK: Output: default@employee_part@employeesalary=3000.0/country=USA +POSTHOOK: Output: default@employee_part@employeesalary=3500.0/country=UK +POSTHOOK: Output: default@employee_part@employeesalary=4000.0/country=USA #### A masked pattern was here #### PREHOOK: query: describe formatted Employee_Part partition (employeeSalary='3000.0', country='UK') employeeID PREHOOK: type: DESCTABLE @@ -329,7 +347,8 @@ analyze table Employee_Part partition (employeeSalary,country) compute statistic POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-0 is a root stage - Stage-1 depends on stages: Stage-0 + Stage-1 depends on stages: Stage-0, Stage-2 + Stage-2 depends on stages: Stage-0 STAGE PLANS: Stage: Stage-0 @@ -337,22 +356,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: employee_part - Statistics: Num rows: 2 Data size: 466 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 54 Data size: 412 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: employeesalary (type: double), country (type: string), employeeid (type: int), employeename (type: string) outputColumnNames: employeesalary, country, employeeid, employeename - Statistics: Num rows: 2 Data size: 466 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 54 Data size: 412 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: compute_stats(employeeid, 16), compute_stats(employeename, 16) keys: employeesalary (type: double), country (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 2 Data size: 466 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 54 Data size: 412 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: double), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: double), _col1 (type: string) - Statistics: Num rows: 2 Data size: 466 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 54 Data size: 412 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: struct), _col3 (type: struct) Reduce Operator Tree: Group By Operator @@ -360,14 +379,14 @@ STAGE PLANS: keys: KEY._col0 (type: double), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 233 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 27 Data size: 206 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: double), _col1 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 233 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 27 Data size: 206 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 233 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 27 Data size: 206 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -380,6 +399,9 @@ STAGE PLANS: Column Types: int, string Table: default.employee_part + Stage: Stage-2 + Stats-Aggr Operator + PREHOOK: query: analyze table Employee_Part partition (employeeSalary,country) compute statistics for columns PREHOOK: type: QUERY PREHOOK: Input: default@employee_part @@ -389,6 +411,13 @@ PREHOOK: Input: default@employee_part@employeesalary=3000.0/country=UK PREHOOK: Input: default@employee_part@employeesalary=3000.0/country=USA PREHOOK: Input: default@employee_part@employeesalary=3500.0/country=UK PREHOOK: Input: default@employee_part@employeesalary=4000.0/country=USA +PREHOOK: Output: default@employee_part +PREHOOK: Output: default@employee_part@employeesalary=2000.0/country=UK +PREHOOK: Output: default@employee_part@employeesalary=2000.0/country=USA +PREHOOK: Output: default@employee_part@employeesalary=3000.0/country=UK +PREHOOK: Output: default@employee_part@employeesalary=3000.0/country=USA +PREHOOK: Output: default@employee_part@employeesalary=3500.0/country=UK +PREHOOK: Output: default@employee_part@employeesalary=4000.0/country=USA #### A masked pattern was here #### POSTHOOK: query: analyze table Employee_Part partition (employeeSalary,country) compute statistics for columns POSTHOOK: type: QUERY @@ -399,6 +428,13 @@ POSTHOOK: Input: default@employee_part@employeesalary=3000.0/country=UK POSTHOOK: Input: default@employee_part@employeesalary=3000.0/country=USA POSTHOOK: Input: default@employee_part@employeesalary=3500.0/country=UK POSTHOOK: Input: default@employee_part@employeesalary=4000.0/country=USA +POSTHOOK: Output: default@employee_part +POSTHOOK: Output: default@employee_part@employeesalary=2000.0/country=UK +POSTHOOK: Output: default@employee_part@employeesalary=2000.0/country=USA +POSTHOOK: Output: default@employee_part@employeesalary=3000.0/country=UK +POSTHOOK: Output: default@employee_part@employeesalary=3000.0/country=USA +POSTHOOK: Output: default@employee_part@employeesalary=3500.0/country=UK +POSTHOOK: Output: default@employee_part@employeesalary=4000.0/country=USA #### A masked pattern was here #### PREHOOK: query: describe formatted Employee_Part partition (employeeSalary='3500.0', country='UK') employeeName PREHOOK: type: DESCTABLE @@ -466,6 +502,11 @@ PREHOOK: Input: default@employee@employeesalary=2000.0/country=UK PREHOOK: Input: default@employee@employeesalary=2000.0/country=USA PREHOOK: Input: default@employee@employeesalary=3000.0/country=UK PREHOOK: Input: default@employee@employeesalary=3500.0/country=UK +PREHOOK: Output: default@employee +PREHOOK: Output: default@employee@employeesalary=2000.0/country=UK +PREHOOK: Output: default@employee@employeesalary=2000.0/country=USA +PREHOOK: Output: default@employee@employeesalary=3000.0/country=UK +PREHOOK: Output: default@employee@employeesalary=3500.0/country=UK #### A masked pattern was here #### POSTHOOK: query: analyze table Employee partition (employeeSalary,country) compute statistics for columns POSTHOOK: type: QUERY @@ -474,6 +515,11 @@ POSTHOOK: Input: default@employee@employeesalary=2000.0/country=UK POSTHOOK: Input: default@employee@employeesalary=2000.0/country=USA POSTHOOK: Input: default@employee@employeesalary=3000.0/country=UK POSTHOOK: Input: default@employee@employeesalary=3500.0/country=UK +POSTHOOK: Output: default@employee +POSTHOOK: Output: default@employee@employeesalary=2000.0/country=UK +POSTHOOK: Output: default@employee@employeesalary=2000.0/country=USA +POSTHOOK: Output: default@employee@employeesalary=3000.0/country=UK +POSTHOOK: Output: default@employee@employeesalary=3500.0/country=UK #### A masked pattern was here #### PREHOOK: query: describe formatted Employee partition (employeeSalary='3500.0', country='UK') employeeName PREHOOK: type: DESCTABLE @@ -511,6 +557,13 @@ PREHOOK: Input: default@employee@employeesalary=3000.0/country=UK PREHOOK: Input: default@employee@employeesalary=3000.0/country=USA PREHOOK: Input: default@employee@employeesalary=3500.0/country=UK PREHOOK: Input: default@employee@employeesalary=4000.0/country=USA +PREHOOK: Output: default@employee +PREHOOK: Output: default@employee@employeesalary=2000.0/country=UK +PREHOOK: Output: default@employee@employeesalary=2000.0/country=USA +PREHOOK: Output: default@employee@employeesalary=3000.0/country=UK +PREHOOK: Output: default@employee@employeesalary=3000.0/country=USA +PREHOOK: Output: default@employee@employeesalary=3500.0/country=UK +PREHOOK: Output: default@employee@employeesalary=4000.0/country=USA #### A masked pattern was here #### POSTHOOK: query: analyze table Employee partition (employeeSalary) compute statistics for columns POSTHOOK: type: QUERY @@ -521,6 +574,13 @@ POSTHOOK: Input: default@employee@employeesalary=3000.0/country=UK POSTHOOK: Input: default@employee@employeesalary=3000.0/country=USA POSTHOOK: Input: default@employee@employeesalary=3500.0/country=UK POSTHOOK: Input: default@employee@employeesalary=4000.0/country=USA +POSTHOOK: Output: default@employee +POSTHOOK: Output: default@employee@employeesalary=2000.0/country=UK +POSTHOOK: Output: default@employee@employeesalary=2000.0/country=USA +POSTHOOK: Output: default@employee@employeesalary=3000.0/country=UK +POSTHOOK: Output: default@employee@employeesalary=3000.0/country=USA +POSTHOOK: Output: default@employee@employeesalary=3500.0/country=UK +POSTHOOK: Output: default@employee@employeesalary=4000.0/country=USA #### A masked pattern was here #### PREHOOK: query: describe formatted Employee partition (employeeSalary='3000.0', country='USA') employeeName PREHOOK: type: DESCTABLE diff --git a/ql/src/test/results/clientpositive/columnstats_quoting.q.out b/ql/src/test/results/clientpositive/columnstats_quoting.q.out index 52e3538..7654897 100644 --- a/ql/src/test/results/clientpositive/columnstats_quoting.q.out +++ b/ql/src/test/results/clientpositive/columnstats_quoting.q.out @@ -16,7 +16,8 @@ POSTHOOK: query: explain analyze table user_web_events compute statistics for co POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-0 is a root stage - Stage-1 depends on stages: Stage-0 + Stage-1 depends on stages: Stage-0, Stage-2 + Stage-2 depends on stages: Stage-0 STAGE PLANS: Stage: Stage-0 @@ -59,13 +60,18 @@ STAGE PLANS: Column Types: bigint, string Table: default.user_web_events + Stage: Stage-2 + Stats-Aggr Operator + PREHOOK: query: analyze table user_web_events compute statistics for columns PREHOOK: type: QUERY PREHOOK: Input: default@user_web_events +PREHOOK: Output: default@user_web_events #### A masked pattern was here #### POSTHOOK: query: analyze table user_web_events compute statistics for columns POSTHOOK: type: QUERY POSTHOOK: Input: default@user_web_events +POSTHOOK: Output: default@user_web_events #### A masked pattern was here #### PREHOOK: query: explain analyze table user_web_events compute statistics for columns `user id` PREHOOK: type: QUERY @@ -73,7 +79,8 @@ POSTHOOK: query: explain analyze table user_web_events compute statistics for co POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-0 is a root stage - Stage-1 depends on stages: Stage-0 + Stage-1 depends on stages: Stage-0, Stage-2 + Stage-2 depends on stages: Stage-0 STAGE PLANS: Stage: Stage-0 @@ -116,11 +123,16 @@ STAGE PLANS: Column Types: bigint Table: default.user_web_events + Stage: Stage-2 + Stats-Aggr Operator + PREHOOK: query: analyze table user_web_events compute statistics for columns `user id` PREHOOK: type: QUERY PREHOOK: Input: default@user_web_events +PREHOOK: Output: default@user_web_events #### A masked pattern was here #### POSTHOOK: query: analyze table user_web_events compute statistics for columns `user id` POSTHOOK: type: QUERY POSTHOOK: Input: default@user_web_events +POSTHOOK: Output: default@user_web_events #### A masked pattern was here #### diff --git a/ql/src/test/results/clientpositive/columnstats_tbllvl.q.out b/ql/src/test/results/clientpositive/columnstats_tbllvl.q.out index 60652e0..965315d 100644 --- a/ql/src/test/results/clientpositive/columnstats_tbllvl.q.out +++ b/ql/src/test/results/clientpositive/columnstats_tbllvl.q.out @@ -46,7 +46,8 @@ analyze table UserVisits_web_text_none compute statistics for columns sourceIP, POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-0 is a root stage - Stage-1 depends on stages: Stage-0 + Stage-1 depends on stages: Stage-0, Stage-2 + Stage-2 depends on stages: Stage-0 STAGE PLANS: Stage: Stage-0 @@ -89,6 +90,9 @@ STAGE PLANS: Column Types: string, int, float Table: default.uservisits_web_text_none + Stage: Stage-2 + Stats-Aggr Operator + PREHOOK: query: explain extended analyze table UserVisits_web_text_none compute statistics for columns sourceIP, avgTimeOnSite, adRevenue PREHOOK: type: QUERY @@ -97,7 +101,8 @@ analyze table UserVisits_web_text_none compute statistics for columns sourceIP, POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-0 is a root stage - Stage-1 depends on stages: Stage-0 + Stage-1 depends on stages: Stage-0, Stage-2 + Stage-2 depends on stages: Stage-0 STAGE PLANS: Stage: Stage-0 @@ -106,7 +111,8 @@ STAGE PLANS: TableScan alias: uservisits_web_text_none Statistics: Num rows: 65 Data size: 7060 Basic stats: COMPLETE Column stats: NONE - GatherStats: false + Statistics Aggregation Key Prefix: default.uservisits_web_text_none/ + GatherStats: true Select Operator expressions: sourceip (type: string), avgtimeonsite (type: int), adrevenue (type: float) outputColumnNames: sourceip, avgtimeonsite, adrevenue @@ -212,13 +218,19 @@ STAGE PLANS: Table: default.uservisits_web_text_none Is Table Level Stats: true + Stage: Stage-2 + Stats-Aggr Operator + Stats Aggregation Key Prefix: default.uservisits_web_text_none/ + PREHOOK: query: analyze table UserVisits_web_text_none compute statistics for columns sourceIP, avgTimeOnSite, adRevenue PREHOOK: type: QUERY PREHOOK: Input: default@uservisits_web_text_none +PREHOOK: Output: default@uservisits_web_text_none #### A masked pattern was here #### POSTHOOK: query: analyze table UserVisits_web_text_none compute statistics for columns sourceIP, avgTimeOnSite, adRevenue POSTHOOK: type: QUERY POSTHOOK: Input: default@uservisits_web_text_none +POSTHOOK: Output: default@uservisits_web_text_none #### A masked pattern was here #### PREHOOK: query: explain analyze table default.UserVisits_web_text_none compute statistics for columns @@ -228,7 +240,8 @@ analyze table default.UserVisits_web_text_none compute statistics for columns POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-0 is a root stage - Stage-1 depends on stages: Stage-0 + Stage-1 depends on stages: Stage-0, Stage-2 + Stage-2 depends on stages: Stage-0 STAGE PLANS: Stage: Stage-0 @@ -236,11 +249,11 @@ STAGE PLANS: Map Operator Tree: TableScan alias: uservisits_web_text_none - Statistics: Num rows: 9 Data size: 7060 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 55 Data size: 7005 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: sourceip (type: string), desturl (type: string), visitdate (type: string), adrevenue (type: float), useragent (type: string), ccode (type: string), lcode (type: string), skeyword (type: string), avgtimeonsite (type: int) outputColumnNames: sourceip, desturl, visitdate, adrevenue, useragent, ccode, lcode, skeyword, avgtimeonsite - Statistics: Num rows: 9 Data size: 7060 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 55 Data size: 7005 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: compute_stats(sourceip, 16), compute_stats(desturl, 16), compute_stats(visitdate, 16), compute_stats(adrevenue, 16), compute_stats(useragent, 16), compute_stats(ccode, 16), compute_stats(lcode, 16), compute_stats(skeyword, 16), compute_stats(avgtimeonsite, 16) mode: hash @@ -271,13 +284,18 @@ STAGE PLANS: Column Types: string, string, string, float, string, string, string, string, int Table: default.uservisits_web_text_none + Stage: Stage-2 + Stats-Aggr Operator + PREHOOK: query: analyze table default.UserVisits_web_text_none compute statistics for columns PREHOOK: type: QUERY PREHOOK: Input: default@uservisits_web_text_none +PREHOOK: Output: default@uservisits_web_text_none #### A masked pattern was here #### POSTHOOK: query: analyze table default.UserVisits_web_text_none compute statistics for columns POSTHOOK: type: QUERY POSTHOOK: Input: default@uservisits_web_text_none +POSTHOOK: Output: default@uservisits_web_text_none #### A masked pattern was here #### PREHOOK: query: describe formatted UserVisits_web_text_none destURL PREHOOK: type: DESCTABLE @@ -334,7 +352,8 @@ analyze table empty_tab compute statistics for columns a,b,c,d,e POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-0 is a root stage - Stage-1 depends on stages: Stage-0 + Stage-1 depends on stages: Stage-0, Stage-2 + Stage-2 depends on stages: Stage-0 STAGE PLANS: Stage: Stage-0 @@ -377,13 +396,18 @@ STAGE PLANS: Column Types: int, double, string, boolean, binary Table: default.empty_tab + Stage: Stage-2 + Stats-Aggr Operator + PREHOOK: query: analyze table empty_tab compute statistics for columns a,b,c,d,e PREHOOK: type: QUERY PREHOOK: Input: default@empty_tab +PREHOOK: Output: default@empty_tab #### A masked pattern was here #### POSTHOOK: query: analyze table empty_tab compute statistics for columns a,b,c,d,e POSTHOOK: type: QUERY POSTHOOK: Input: default@empty_tab +POSTHOOK: Output: default@empty_tab #### A masked pattern was here #### PREHOOK: query: create database if not exists dummydb PREHOOK: type: CREATEDATABASE @@ -400,10 +424,13 @@ POSTHOOK: Input: database:dummydb PREHOOK: query: analyze table default.UserVisits_web_text_none compute statistics for columns destURL PREHOOK: type: QUERY PREHOOK: Input: default@uservisits_web_text_none +PREHOOK: Output: default@uservisits_web_text_none #### A masked pattern was here #### +Cannot get table uservisits_web_text_none POSTHOOK: query: analyze table default.UserVisits_web_text_none compute statistics for columns destURL POSTHOOK: type: QUERY POSTHOOK: Input: default@uservisits_web_text_none +POSTHOOK: Output: default@uservisits_web_text_none #### A masked pattern was here #### PREHOOK: query: describe formatted default.UserVisits_web_text_none destURL PREHOOK: type: DESCTABLE @@ -464,7 +491,8 @@ analyze table dummydb.UserVisits_in_dummy_db compute statistics for columns sour POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-0 is a root stage - Stage-1 depends on stages: Stage-0 + Stage-1 depends on stages: Stage-0, Stage-2 + Stage-2 depends on stages: Stage-0 STAGE PLANS: Stage: Stage-0 @@ -507,6 +535,9 @@ STAGE PLANS: Column Types: string, int, float Table: dummydb.uservisits_in_dummy_db + Stage: Stage-2 + Stats-Aggr Operator + PREHOOK: query: explain extended analyze table dummydb.UserVisits_in_dummy_db compute statistics for columns sourceIP, avgTimeOnSite, adRevenue PREHOOK: type: QUERY @@ -515,7 +546,8 @@ analyze table dummydb.UserVisits_in_dummy_db compute statistics for columns sour POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-0 is a root stage - Stage-1 depends on stages: Stage-0 + Stage-1 depends on stages: Stage-0, Stage-2 + Stage-2 depends on stages: Stage-0 STAGE PLANS: Stage: Stage-0 @@ -524,7 +556,8 @@ STAGE PLANS: TableScan alias: uservisits_in_dummy_db Statistics: Num rows: 65 Data size: 7060 Basic stats: COMPLETE Column stats: NONE - GatherStats: false + Statistics Aggregation Key Prefix: dummydb.uservisits_in_dummy_db/ + GatherStats: true Select Operator expressions: sourceip (type: string), avgtimeonsite (type: int), adrevenue (type: float) outputColumnNames: sourceip, avgtimeonsite, adrevenue @@ -630,13 +663,20 @@ STAGE PLANS: Table: dummydb.uservisits_in_dummy_db Is Table Level Stats: true + Stage: Stage-2 + Stats-Aggr Operator + Stats Aggregation Key Prefix: dummydb.uservisits_in_dummy_db/ + PREHOOK: query: analyze table dummydb.UserVisits_in_dummy_db compute statistics for columns sourceIP, avgTimeOnSite, adRevenue PREHOOK: type: QUERY PREHOOK: Input: dummydb@uservisits_in_dummy_db +PREHOOK: Output: dummydb@uservisits_in_dummy_db #### A masked pattern was here #### +Cannot get table uservisits_in_dummy_db POSTHOOK: query: analyze table dummydb.UserVisits_in_dummy_db compute statistics for columns sourceIP, avgTimeOnSite, adRevenue POSTHOOK: type: QUERY POSTHOOK: Input: dummydb@uservisits_in_dummy_db +POSTHOOK: Output: dummydb@uservisits_in_dummy_db #### A masked pattern was here #### PREHOOK: query: explain analyze table dummydb.UserVisits_in_dummy_db compute statistics for columns @@ -646,7 +686,8 @@ analyze table dummydb.UserVisits_in_dummy_db compute statistics for columns POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-0 is a root stage - Stage-1 depends on stages: Stage-0 + Stage-1 depends on stages: Stage-0, Stage-2 + Stage-2 depends on stages: Stage-0 STAGE PLANS: Stage: Stage-0 @@ -654,11 +695,11 @@ STAGE PLANS: Map Operator Tree: TableScan alias: uservisits_in_dummy_db - Statistics: Num rows: 9 Data size: 7060 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 55 Data size: 7005 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: sourceip (type: string), desturl (type: string), visitdate (type: string), adrevenue (type: float), useragent (type: string), ccode (type: string), lcode (type: string), skeyword (type: string), avgtimeonsite (type: int) outputColumnNames: sourceip, desturl, visitdate, adrevenue, useragent, ccode, lcode, skeyword, avgtimeonsite - Statistics: Num rows: 9 Data size: 7060 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 55 Data size: 7005 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: compute_stats(sourceip, 16), compute_stats(desturl, 16), compute_stats(visitdate, 16), compute_stats(adrevenue, 16), compute_stats(useragent, 16), compute_stats(ccode, 16), compute_stats(lcode, 16), compute_stats(skeyword, 16), compute_stats(avgtimeonsite, 16) mode: hash @@ -689,13 +730,19 @@ STAGE PLANS: Column Types: string, string, string, float, string, string, string, string, int Table: dummydb.uservisits_in_dummy_db + Stage: Stage-2 + Stats-Aggr Operator + PREHOOK: query: analyze table dummydb.UserVisits_in_dummy_db compute statistics for columns PREHOOK: type: QUERY PREHOOK: Input: dummydb@uservisits_in_dummy_db +PREHOOK: Output: dummydb@uservisits_in_dummy_db #### A masked pattern was here #### +Cannot get table uservisits_in_dummy_db POSTHOOK: query: analyze table dummydb.UserVisits_in_dummy_db compute statistics for columns POSTHOOK: type: QUERY POSTHOOK: Input: dummydb@uservisits_in_dummy_db +POSTHOOK: Output: dummydb@uservisits_in_dummy_db #### A masked pattern was here #### PREHOOK: query: describe formatted dummydb.UserVisits_in_dummy_db destURL PREHOOK: type: DESCTABLE diff --git a/ql/src/test/results/clientpositive/compustat_avro.q.out b/ql/src/test/results/clientpositive/compustat_avro.q.out index 50c03bd..00c6938 100644 --- a/ql/src/test/results/clientpositive/compustat_avro.q.out +++ b/ql/src/test/results/clientpositive/compustat_avro.q.out @@ -36,10 +36,12 @@ col1 string PREHOOK: query: analyze table testAvro compute statistics for columns col1,col3 PREHOOK: type: QUERY PREHOOK: Input: default@testavro +PREHOOK: Output: default@testavro #### A masked pattern was here #### POSTHOOK: query: analyze table testAvro compute statistics for columns col1,col3 POSTHOOK: type: QUERY POSTHOOK: Input: default@testavro +POSTHOOK: Output: default@testavro #### A masked pattern was here #### PREHOOK: query: describe formatted testAvro col1 PREHOOK: type: DESCTABLE diff --git a/ql/src/test/results/clientpositive/compute_stats_date.q.out b/ql/src/test/results/clientpositive/compute_stats_date.q.out index 73f08ad..407f984 100644 --- a/ql/src/test/results/clientpositive/compute_stats_date.q.out +++ b/ql/src/test/results/clientpositive/compute_stats_date.q.out @@ -52,7 +52,8 @@ analyze table tab_date compute statistics for columns fl_date POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-0 is a root stage - Stage-1 depends on stages: Stage-0 + Stage-1 depends on stages: Stage-0, Stage-2 + Stage-2 depends on stages: Stage-0 STAGE PLANS: Stage: Stage-0 @@ -95,13 +96,18 @@ STAGE PLANS: Column Types: date Table: default.tab_date + Stage: Stage-2 + Stats-Aggr Operator + PREHOOK: query: analyze table tab_date compute statistics for columns fl_date PREHOOK: type: QUERY PREHOOK: Input: default@tab_date +PREHOOK: Output: default@tab_date #### A masked pattern was here #### POSTHOOK: query: analyze table tab_date compute statistics for columns fl_date POSTHOOK: type: QUERY POSTHOOK: Input: default@tab_date +POSTHOOK: Output: default@tab_date #### A masked pattern was here #### PREHOOK: query: describe formatted tab_date fl_date PREHOOK: type: DESCTABLE diff --git a/ql/src/test/results/clientpositive/constGby.q.out b/ql/src/test/results/clientpositive/constGby.q.out index fd8ecc2..ac28f6c 100644 --- a/ql/src/test/results/clientpositive/constGby.q.out +++ b/ql/src/test/results/clientpositive/constGby.q.out @@ -17,10 +17,12 @@ POSTHOOK: Output: default@t1 PREHOOK: query: analyze table t1 compute statistics for columns PREHOOK: type: QUERY PREHOOK: Input: default@t1 +PREHOOK: Output: default@t1 #### A masked pattern was here #### POSTHOOK: query: analyze table t1 compute statistics for columns POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 +POSTHOOK: Output: default@t1 #### A masked pattern was here #### PREHOOK: query: explain select count(1) from t1 group by 1 PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/constant_prop_3.q.out b/ql/src/test/results/clientpositive/constant_prop_3.q.out index ecf6e57..41e7bea 100644 --- a/ql/src/test/results/clientpositive/constant_prop_3.q.out +++ b/ql/src/test/results/clientpositive/constant_prop_3.q.out @@ -51,10 +51,12 @@ POSTHOOK: Output: default@part_hive PREHOOK: query: analyze table part_hive compute statistics for columns PREHOOK: type: QUERY PREHOOK: Input: default@part_hive +PREHOOK: Output: default@part_hive #### A masked pattern was here #### POSTHOOK: query: analyze table part_hive compute statistics for columns POSTHOOK: type: QUERY POSTHOOK: Input: default@part_hive +POSTHOOK: Output: default@part_hive #### A masked pattern was here #### PREHOOK: query: analyze table partsupp_hive compute statistics PREHOOK: type: QUERY @@ -67,10 +69,12 @@ POSTHOOK: Output: default@partsupp_hive PREHOOK: query: analyze table partsupp_hive compute statistics for columns PREHOOK: type: QUERY PREHOOK: Input: default@partsupp_hive +PREHOOK: Output: default@partsupp_hive #### A masked pattern was here #### POSTHOOK: query: analyze table partsupp_hive compute statistics for columns POSTHOOK: type: QUERY POSTHOOK: Input: default@partsupp_hive +POSTHOOK: Output: default@partsupp_hive #### A masked pattern was here #### PREHOOK: query: analyze table supplier_hive compute statistics PREHOOK: type: QUERY @@ -83,10 +87,12 @@ POSTHOOK: Output: default@supplier_hive PREHOOK: query: analyze table supplier_hive compute statistics for columns PREHOOK: type: QUERY PREHOOK: Input: default@supplier_hive +PREHOOK: Output: default@supplier_hive #### A masked pattern was here #### POSTHOOK: query: analyze table supplier_hive compute statistics for columns POSTHOOK: type: QUERY POSTHOOK: Input: default@supplier_hive +POSTHOOK: Output: default@supplier_hive #### A masked pattern was here #### Warning: Shuffle Join JOIN[25][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Stage-2:MAPRED' is a cross product PREHOOK: query: explain select diff --git a/ql/src/test/results/clientpositive/decimal_stats.q.out b/ql/src/test/results/clientpositive/decimal_stats.q.out index cb02f76..38240ac 100644 --- a/ql/src/test/results/clientpositive/decimal_stats.q.out +++ b/ql/src/test/results/clientpositive/decimal_stats.q.out @@ -35,10 +35,12 @@ POSTHOOK: Lineage: decimal_1.v EXPRESSION [] PREHOOK: query: analyze table decimal_1 compute statistics for columns PREHOOK: type: QUERY PREHOOK: Input: default@decimal_1 +PREHOOK: Output: default@decimal_1 #### A masked pattern was here #### POSTHOOK: query: analyze table decimal_1 compute statistics for columns POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_1 +POSTHOOK: Output: default@decimal_1 #### A masked pattern was here #### PREHOOK: query: desc formatted decimal_1 v PREHOOK: type: DESCTABLE diff --git a/ql/src/test/results/clientpositive/deleteAnalyze.q.out b/ql/src/test/results/clientpositive/deleteAnalyze.q.out index 4382522..ddb6596 100644 --- a/ql/src/test/results/clientpositive/deleteAnalyze.q.out +++ b/ql/src/test/results/clientpositive/deleteAnalyze.q.out @@ -75,18 +75,22 @@ amount decimal(10,3) PREHOOK: query: analyze table testdeci2 compute statistics for columns PREHOOK: type: QUERY PREHOOK: Input: default@testdeci2 +PREHOOK: Output: default@testdeci2 #### A masked pattern was here #### POSTHOOK: query: analyze table testdeci2 compute statistics for columns POSTHOOK: type: QUERY POSTHOOK: Input: default@testdeci2 +POSTHOOK: Output: default@testdeci2 #### A masked pattern was here #### PREHOOK: query: analyze table testdeci2 compute statistics for columns PREHOOK: type: QUERY PREHOOK: Input: default@testdeci2 +PREHOOK: Output: default@testdeci2 #### A masked pattern was here #### POSTHOOK: query: analyze table testdeci2 compute statistics for columns POSTHOOK: type: QUERY POSTHOOK: Input: default@testdeci2 +POSTHOOK: Output: default@testdeci2 #### A masked pattern was here #### PREHOOK: query: explain select s.id, diff --git a/ql/src/test/results/clientpositive/display_colstats_tbllvl.q.out b/ql/src/test/results/clientpositive/display_colstats_tbllvl.q.out index e10edb4..1756317 100644 --- a/ql/src/test/results/clientpositive/display_colstats_tbllvl.q.out +++ b/ql/src/test/results/clientpositive/display_colstats_tbllvl.q.out @@ -62,7 +62,8 @@ analyze table UserVisits_web_text_none compute statistics for columns sourceIP, POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-0 is a root stage - Stage-1 depends on stages: Stage-0 + Stage-1 depends on stages: Stage-0, Stage-2 + Stage-2 depends on stages: Stage-0 STAGE PLANS: Stage: Stage-0 @@ -105,6 +106,9 @@ STAGE PLANS: Column Types: string, int, float Table: default.uservisits_web_text_none + Stage: Stage-2 + Stats-Aggr Operator + PREHOOK: query: explain extended analyze table UserVisits_web_text_none compute statistics for columns sourceIP, avgTimeOnSite, adRevenue PREHOOK: type: QUERY @@ -113,7 +117,8 @@ analyze table UserVisits_web_text_none compute statistics for columns sourceIP, POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-0 is a root stage - Stage-1 depends on stages: Stage-0 + Stage-1 depends on stages: Stage-0, Stage-2 + Stage-2 depends on stages: Stage-0 STAGE PLANS: Stage: Stage-0 @@ -122,7 +127,8 @@ STAGE PLANS: TableScan alias: uservisits_web_text_none Statistics: Num rows: 65 Data size: 7060 Basic stats: COMPLETE Column stats: NONE - GatherStats: false + Statistics Aggregation Key Prefix: default.uservisits_web_text_none/ + GatherStats: true Select Operator expressions: sourceip (type: string), avgtimeonsite (type: int), adrevenue (type: float) outputColumnNames: sourceip, avgtimeonsite, adrevenue @@ -228,13 +234,19 @@ STAGE PLANS: Table: default.uservisits_web_text_none Is Table Level Stats: true + Stage: Stage-2 + Stats-Aggr Operator + Stats Aggregation Key Prefix: default.uservisits_web_text_none/ + PREHOOK: query: analyze table UserVisits_web_text_none compute statistics for columns sourceIP, avgTimeOnSite, adRevenue PREHOOK: type: QUERY PREHOOK: Input: default@uservisits_web_text_none +PREHOOK: Output: default@uservisits_web_text_none #### A masked pattern was here #### POSTHOOK: query: analyze table UserVisits_web_text_none compute statistics for columns sourceIP, avgTimeOnSite, adRevenue POSTHOOK: type: QUERY POSTHOOK: Input: default@uservisits_web_text_none +POSTHOOK: Output: default@uservisits_web_text_none #### A masked pattern was here #### PREHOOK: query: desc formatted UserVisits_web_text_none sourceIP PREHOOK: type: DESCTABLE @@ -300,7 +312,8 @@ analyze table empty_tab compute statistics for columns a,b,c,d,e POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-0 is a root stage - Stage-1 depends on stages: Stage-0 + Stage-1 depends on stages: Stage-0, Stage-2 + Stage-2 depends on stages: Stage-0 STAGE PLANS: Stage: Stage-0 @@ -343,13 +356,18 @@ STAGE PLANS: Column Types: int, double, string, boolean, binary Table: default.empty_tab + Stage: Stage-2 + Stats-Aggr Operator + PREHOOK: query: analyze table empty_tab compute statistics for columns a,b,c,d,e PREHOOK: type: QUERY PREHOOK: Input: default@empty_tab +PREHOOK: Output: default@empty_tab #### A masked pattern was here #### POSTHOOK: query: analyze table empty_tab compute statistics for columns a,b,c,d,e POSTHOOK: type: QUERY POSTHOOK: Input: default@empty_tab +POSTHOOK: Output: default@empty_tab #### A masked pattern was here #### PREHOOK: query: desc formatted empty_tab a PREHOOK: type: DESCTABLE @@ -469,10 +487,12 @@ PREHOOK: query: analyze table UserVisits_web_text_none compute statistics for co PREHOOK: type: QUERY PREHOOK: Input: test@uservisits_web_text_none #### A masked pattern was here #### +PREHOOK: Output: test@uservisits_web_text_none POSTHOOK: query: analyze table UserVisits_web_text_none compute statistics for columns sKeyword POSTHOOK: type: QUERY POSTHOOK: Input: test@uservisits_web_text_none #### A masked pattern was here #### +POSTHOOK: Output: test@uservisits_web_text_none PREHOOK: query: desc extended UserVisits_web_text_none sKeyword PREHOOK: type: DESCTABLE PREHOOK: Input: test@uservisits_web_text_none diff --git a/ql/src/test/results/clientpositive/distinct_stats.q.out b/ql/src/test/results/clientpositive/distinct_stats.q.out index 73b4add..bc2ab02 100644 --- a/ql/src/test/results/clientpositive/distinct_stats.q.out +++ b/ql/src/test/results/clientpositive/distinct_stats.q.out @@ -19,10 +19,12 @@ POSTHOOK: Lineage: t1.b SIMPLE [(src)src.FieldSchema(name:value, type:string, co PREHOOK: query: analyze table t1 compute statistics for columns a,b PREHOOK: type: QUERY PREHOOK: Input: default@t1 +PREHOOK: Output: default@t1 #### A masked pattern was here #### POSTHOOK: query: analyze table t1 compute statistics for columns a,b POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 +POSTHOOK: Output: default@t1 #### A masked pattern was here #### PREHOOK: query: explain select count(distinct b) from t1 group by a diff --git a/ql/src/test/results/clientpositive/drop_table_with_stats.q.out b/ql/src/test/results/clientpositive/drop_table_with_stats.q.out index 52aa10a..119f74a 100644 --- a/ql/src/test/results/clientpositive/drop_table_with_stats.q.out +++ b/ql/src/test/results/clientpositive/drop_table_with_stats.q.out @@ -30,10 +30,12 @@ PREHOOK: query: ANALYZE TABLE testtable COMPUTE STATISTICS FOR COLUMNS key PREHOOK: type: QUERY PREHOOK: Input: tblstatsdb1@testtable #### A masked pattern was here #### +PREHOOK: Output: tblstatsdb1@testtable POSTHOOK: query: ANALYZE TABLE testtable COMPUTE STATISTICS FOR COLUMNS key POSTHOOK: type: QUERY POSTHOOK: Input: tblstatsdb1@testtable #### A masked pattern was here #### +POSTHOOK: Output: tblstatsdb1@testtable PREHOOK: query: CREATE TABLE IF NOT EXISTS TestTable1 (key STRING, value STRING) PREHOOK: type: CREATETABLE PREHOOK: Output: database:tblstatsdb1 @@ -54,10 +56,12 @@ PREHOOK: query: ANALYZE TABLE TestTable1 COMPUTE STATISTICS FOR COLUMNS key PREHOOK: type: QUERY PREHOOK: Input: tblstatsdb1@testtable1 #### A masked pattern was here #### +PREHOOK: Output: tblstatsdb1@testtable1 POSTHOOK: query: ANALYZE TABLE TestTable1 COMPUTE STATISTICS FOR COLUMNS key POSTHOOK: type: QUERY POSTHOOK: Input: tblstatsdb1@testtable1 #### A masked pattern was here #### +POSTHOOK: Output: tblstatsdb1@testtable1 PREHOOK: query: CREATE TABLE IF NOT EXISTS TESTTABLE2 (key STRING, value STRING) PREHOOK: type: CREATETABLE PREHOOK: Output: database:tblstatsdb1 @@ -78,10 +82,12 @@ PREHOOK: query: ANALYZE TABLE TESTTABLE2 COMPUTE STATISTICS FOR COLUMNS key PREHOOK: type: QUERY PREHOOK: Input: tblstatsdb1@testtable2 #### A masked pattern was here #### +PREHOOK: Output: tblstatsdb1@testtable2 POSTHOOK: query: ANALYZE TABLE TESTTABLE2 COMPUTE STATISTICS FOR COLUMNS key POSTHOOK: type: QUERY POSTHOOK: Input: tblstatsdb1@testtable2 #### A masked pattern was here #### +POSTHOOK: Output: tblstatsdb1@testtable2 PREHOOK: query: DROP TABLE tblstatsdb1.testtable PREHOOK: type: DROPTABLE PREHOOK: Input: tblstatsdb1@testtable @@ -146,10 +152,12 @@ PREHOOK: query: ANALYZE TABLE testtable COMPUTE STATISTICS FOR COLUMNS key PREHOOK: type: QUERY PREHOOK: Input: tblstatsdb2@testtable #### A masked pattern was here #### +PREHOOK: Output: tblstatsdb2@testtable POSTHOOK: query: ANALYZE TABLE testtable COMPUTE STATISTICS FOR COLUMNS key POSTHOOK: type: QUERY POSTHOOK: Input: tblstatsdb2@testtable #### A masked pattern was here #### +POSTHOOK: Output: tblstatsdb2@testtable PREHOOK: query: CREATE TABLE IF NOT EXISTS TestTable1 (key STRING, value STRING) PREHOOK: type: CREATETABLE PREHOOK: Output: TBLSTATSDB2@TestTable1 @@ -170,10 +178,12 @@ PREHOOK: query: ANALYZE TABLE TestTable1 COMPUTE STATISTICS FOR COLUMNS key PREHOOK: type: QUERY PREHOOK: Input: tblstatsdb2@testtable1 #### A masked pattern was here #### +PREHOOK: Output: tblstatsdb2@testtable1 POSTHOOK: query: ANALYZE TABLE TestTable1 COMPUTE STATISTICS FOR COLUMNS key POSTHOOK: type: QUERY POSTHOOK: Input: tblstatsdb2@testtable1 #### A masked pattern was here #### +POSTHOOK: Output: tblstatsdb2@testtable1 PREHOOK: query: CREATE TABLE IF NOT EXISTS TESTTABLE2 (key STRING, value STRING) PREHOOK: type: CREATETABLE PREHOOK: Output: TBLSTATSDB2@TESTTABLE2 @@ -194,10 +204,12 @@ PREHOOK: query: ANALYZE TABLE TESTTABLE2 COMPUTE STATISTICS FOR COLUMNS key PREHOOK: type: QUERY PREHOOK: Input: tblstatsdb2@testtable2 #### A masked pattern was here #### +PREHOOK: Output: tblstatsdb2@testtable2 POSTHOOK: query: ANALYZE TABLE TESTTABLE2 COMPUTE STATISTICS FOR COLUMNS key POSTHOOK: type: QUERY POSTHOOK: Input: tblstatsdb2@testtable2 #### A masked pattern was here #### +POSTHOOK: Output: tblstatsdb2@testtable2 PREHOOK: query: DROP TABLE TBLSTATSDB2.testtable PREHOOK: type: DROPTABLE PREHOOK: Input: tblstatsdb2@testtable diff --git a/ql/src/test/results/clientpositive/exec_parallel_column_stats.q.out b/ql/src/test/results/clientpositive/exec_parallel_column_stats.q.out index f256ec1..23517b0 100644 --- a/ql/src/test/results/clientpositive/exec_parallel_column_stats.q.out +++ b/ql/src/test/results/clientpositive/exec_parallel_column_stats.q.out @@ -1,22 +1,31 @@ -PREHOOK: query: explain analyze table src compute statistics for columns +PREHOOK: query: create table s like src +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@s +POSTHOOK: query: create table s like src +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@s +PREHOOK: query: explain analyze table s compute statistics for columns PREHOOK: type: QUERY -POSTHOOK: query: explain analyze table src compute statistics for columns +POSTHOOK: query: explain analyze table s compute statistics for columns POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-0 is a root stage - Stage-1 depends on stages: Stage-0 + Stage-1 depends on stages: Stage-0, Stage-2 + Stage-2 depends on stages: Stage-0 STAGE PLANS: Stage: Stage-0 Map Reduce Map Operator Tree: TableScan - alias: src - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + alias: s + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: key, value - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Group By Operator aggregations: compute_stats(key, 16), compute_stats(value, 16) mode: hash @@ -45,13 +54,18 @@ STAGE PLANS: Column Stats Desc: Columns: key, value Column Types: string, string - Table: default.src + Table: default.s -PREHOOK: query: analyze table src compute statistics for columns + Stage: Stage-2 + Stats-Aggr Operator + +PREHOOK: query: analyze table s compute statistics for columns PREHOOK: type: QUERY -PREHOOK: Input: default@src +PREHOOK: Input: default@s +PREHOOK: Output: default@s #### A masked pattern was here #### -POSTHOOK: query: analyze table src compute statistics for columns +POSTHOOK: query: analyze table s compute statistics for columns POSTHOOK: type: QUERY -POSTHOOK: Input: default@src +POSTHOOK: Input: default@s +POSTHOOK: Output: default@s #### A masked pattern was here #### diff --git a/ql/src/test/results/clientpositive/extrapolate_part_stats_date.q.out b/ql/src/test/results/clientpositive/extrapolate_part_stats_date.q.out index b12d3a1..5f42737 100644 --- a/ql/src/test/results/clientpositive/extrapolate_part_stats_date.q.out +++ b/ql/src/test/results/clientpositive/extrapolate_part_stats_date.q.out @@ -33,6 +33,10 @@ PREHOOK: Input: default@date_dim PREHOOK: Input: default@date_dim@d_date_sk=2416945 PREHOOK: Input: default@date_dim@d_date_sk=2416946 PREHOOK: Input: default@date_dim@d_date_sk=2416947 +PREHOOK: Output: default@date_dim +PREHOOK: Output: default@date_dim@d_date_sk=2416945 +PREHOOK: Output: default@date_dim@d_date_sk=2416946 +PREHOOK: Output: default@date_dim@d_date_sk=2416947 #### A masked pattern was here #### POSTHOOK: query: analyze table date_dim partition(d_date_sk) compute statistics for columns POSTHOOK: type: QUERY @@ -40,6 +44,10 @@ POSTHOOK: Input: default@date_dim POSTHOOK: Input: default@date_dim@d_date_sk=2416945 POSTHOOK: Input: default@date_dim@d_date_sk=2416946 POSTHOOK: Input: default@date_dim@d_date_sk=2416947 +POSTHOOK: Output: default@date_dim +POSTHOOK: Output: default@date_dim@d_date_sk=2416945 +POSTHOOK: Output: default@date_dim@d_date_sk=2416946 +POSTHOOK: Output: default@date_dim@d_date_sk=2416947 #### A masked pattern was here #### PREHOOK: query: explain select count(*) from date_dim where d_date > date "1900-01-02" and d_date_sk= 2416945 PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/llap/alter_table_invalidate_column_stats.q.out b/ql/src/test/results/clientpositive/llap/alter_table_invalidate_column_stats.q.out index 85d7dc4..38854ae 100644 --- a/ql/src/test/results/clientpositive/llap/alter_table_invalidate_column_stats.q.out +++ b/ql/src/test/results/clientpositive/llap/alter_table_invalidate_column_stats.q.out @@ -77,10 +77,12 @@ PREHOOK: query: analyze table testtable1 compute statistics for columns PREHOOK: type: QUERY PREHOOK: Input: statsdb1@testtable1 #### A masked pattern was here #### +PREHOOK: Output: statsdb1@testtable1 POSTHOOK: query: analyze table testtable1 compute statistics for columns POSTHOOK: type: QUERY POSTHOOK: Input: statsdb1@testtable1 #### A masked pattern was here #### +POSTHOOK: Output: statsdb1@testtable1 PREHOOK: query: describe formatted statsdb1.testtable1 col1 PREHOOK: type: DESCTABLE PREHOOK: Input: statsdb1@testtable1 @@ -193,12 +195,18 @@ PREHOOK: Input: statsdb1@testpart1 PREHOOK: Input: statsdb1@testpart1@part=part1 PREHOOK: Input: statsdb1@testpart1@part=part2 #### A masked pattern was here #### +PREHOOK: Output: statsdb1@testpart1 +PREHOOK: Output: statsdb1@testpart1@part=part1 +PREHOOK: Output: statsdb1@testpart1@part=part2 POSTHOOK: query: analyze table testpart1 compute statistics for columns POSTHOOK: type: QUERY POSTHOOK: Input: statsdb1@testpart1 POSTHOOK: Input: statsdb1@testpart1@part=part1 POSTHOOK: Input: statsdb1@testpart1@part=part2 #### A masked pattern was here #### +POSTHOOK: Output: statsdb1@testpart1 +POSTHOOK: Output: statsdb1@testpart1@part=part1 +POSTHOOK: Output: statsdb1@testpart1@part=part2 PREHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part1') col1 PREHOOK: type: DESCTABLE PREHOOK: Input: statsdb1@testpart1 @@ -543,10 +551,12 @@ PREHOOK: query: analyze table testtable1 compute statistics for columns PREHOOK: type: QUERY PREHOOK: Input: statsdb1@testtable1 #### A masked pattern was here #### +PREHOOK: Output: statsdb1@testtable1 POSTHOOK: query: analyze table testtable1 compute statistics for columns POSTHOOK: type: QUERY POSTHOOK: Input: statsdb1@testtable1 #### A masked pattern was here #### +POSTHOOK: Output: statsdb1@testtable1 PREHOOK: query: describe formatted statsdb1.testtable1 col1 PREHOOK: type: DESCTABLE PREHOOK: Input: statsdb1@testtable1 @@ -659,12 +669,18 @@ PREHOOK: Input: statsdb1@testpart1 PREHOOK: Input: statsdb1@testpart1@part=part1 PREHOOK: Input: statsdb1@testpart1@part=part2 #### A masked pattern was here #### +PREHOOK: Output: statsdb1@testpart1 +PREHOOK: Output: statsdb1@testpart1@part=part1 +PREHOOK: Output: statsdb1@testpart1@part=part2 POSTHOOK: query: analyze table testpart1 compute statistics for columns POSTHOOK: type: QUERY POSTHOOK: Input: statsdb1@testpart1 POSTHOOK: Input: statsdb1@testpart1@part=part1 POSTHOOK: Input: statsdb1@testpart1@part=part2 #### A masked pattern was here #### +POSTHOOK: Output: statsdb1@testpart1 +POSTHOOK: Output: statsdb1@testpart1@part=part1 +POSTHOOK: Output: statsdb1@testpart1@part=part2 PREHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part1') col1 PREHOOK: type: DESCTABLE PREHOOK: Input: statsdb1@testpart1 diff --git a/ql/src/test/results/clientpositive/llap/columnStatsUpdateForStatsOptimizer_1.q.out b/ql/src/test/results/clientpositive/llap/columnStatsUpdateForStatsOptimizer_1.q.out index 80ccddd..bc76dd3 100644 --- a/ql/src/test/results/clientpositive/llap/columnStatsUpdateForStatsOptimizer_1.q.out +++ b/ql/src/test/results/clientpositive/llap/columnStatsUpdateForStatsOptimizer_1.q.out @@ -189,10 +189,12 @@ POSTHOOK: Input: default@calendar PREHOOK: query: analyze table calendar compute statistics for columns PREHOOK: type: QUERY PREHOOK: Input: default@calendar +PREHOOK: Output: default@calendar #### A masked pattern was here #### POSTHOOK: query: analyze table calendar compute statistics for columns POSTHOOK: type: QUERY POSTHOOK: Input: default@calendar +POSTHOOK: Output: default@calendar #### A masked pattern was here #### PREHOOK: query: desc formatted calendar PREHOOK: type: DESCTABLE @@ -432,10 +434,12 @@ POSTHOOK: Input: default@calendar PREHOOK: query: analyze table calendar compute statistics for columns year PREHOOK: type: QUERY PREHOOK: Input: default@calendar +PREHOOK: Output: default@calendar #### A masked pattern was here #### POSTHOOK: query: analyze table calendar compute statistics for columns year POSTHOOK: type: QUERY POSTHOOK: Input: default@calendar +POSTHOOK: Output: default@calendar #### A masked pattern was here #### PREHOOK: query: desc formatted calendar PREHOOK: type: DESCTABLE @@ -565,10 +569,12 @@ POSTHOOK: Input: default@calendar PREHOOK: query: analyze table calendar compute statistics for columns month PREHOOK: type: QUERY PREHOOK: Input: default@calendar +PREHOOK: Output: default@calendar #### A masked pattern was here #### POSTHOOK: query: analyze table calendar compute statistics for columns month POSTHOOK: type: QUERY POSTHOOK: Input: default@calendar +POSTHOOK: Output: default@calendar #### A masked pattern was here #### PREHOOK: query: desc formatted calendar PREHOOK: type: DESCTABLE diff --git a/ql/src/test/results/clientpositive/llap/column_table_stats.q.out b/ql/src/test/results/clientpositive/llap/column_table_stats.q.out new file mode 100644 index 0000000..7727193 --- /dev/null +++ b/ql/src/test/results/clientpositive/llap/column_table_stats.q.out @@ -0,0 +1,989 @@ +PREHOOK: query: DROP TABLE IF EXISTS s +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE IF EXISTS s +POSTHOOK: type: DROPTABLE +PREHOOK: query: CREATE TABLE s (key STRING COMMENT 'default', value STRING COMMENT 'default') STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@s +POSTHOOK: query: CREATE TABLE s (key STRING COMMENT 'default', value STRING COMMENT 'default') STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@s +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/kv1.txt' INTO TABLE s +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@s +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/kv1.txt' INTO TABLE s +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@s +PREHOOK: query: desc formatted s +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@s +POSTHOOK: query: desc formatted s +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@s +# col_name data_type comment + +key string default +value string default + +# Detailed Table Information +Database: default +#### A masked pattern was here #### +Retention: 0 +#### A masked pattern was here #### +Table Type: MANAGED_TABLE +Table Parameters: + numFiles 1 + numRows 0 + rawDataSize 0 + totalSize 5812 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: explain extended analyze table s compute statistics for columns +PREHOOK: type: QUERY +POSTHOOK: query: explain extended analyze table s compute statistics for columns +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-0 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Statistics Aggregation Key Prefix: default.s/ + GatherStats: true + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: key, value + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false + Execution mode: llap + LLAP IO: no inputs + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: s + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + column.name.delimiter , + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.s + numFiles 1 + numRows 0 + rawDataSize 0 + serialization.ddl struct s { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + column.name.delimiter , + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.s + numFiles 1 + numRows 0 + rawDataSize 0 + serialization.ddl struct s { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.s + name: default.s + Truncated Path -> Alias: + /s [s] + Reducer 2 + Execution mode: llap + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-2 + Stats-Aggr Operator + Stats Aggregation Key Prefix: default.s/ + + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.s + Is Table Level Stats: true + +PREHOOK: query: analyze table s compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@s +PREHOOK: Output: default@s +#### A masked pattern was here #### +POSTHOOK: query: analyze table s compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@s +POSTHOOK: Output: default@s +#### A masked pattern was here #### +PREHOOK: query: desc formatted s +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@s +POSTHOOK: query: desc formatted s +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@s +# col_name data_type comment + +key string default +value string default + +# Detailed Table Information +Database: default +#### A masked pattern was here #### +Retention: 0 +#### A masked pattern was here #### +Table Type: MANAGED_TABLE +Table Parameters: + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} + numFiles 1 + numRows 500 + rawDataSize 5312 + totalSize 5812 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: DROP TABLE IF EXISTS spart +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE IF EXISTS spart +POSTHOOK: type: DROPTABLE +PREHOOK: query: CREATE TABLE spart (key STRING COMMENT 'default', value STRING COMMENT 'default') +PARTITIONED BY (ds STRING, hr STRING) +STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@spart +POSTHOOK: query: CREATE TABLE spart (key STRING COMMENT 'default', value STRING COMMENT 'default') +PARTITIONED BY (ds STRING, hr STRING) +STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@spart +PREHOOK: query: LOAD DATA LOCAL INPATH "../../data/files/kv1.txt" +OVERWRITE INTO TABLE spart PARTITION (ds="2008-04-08", hr="11") +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@spart +POSTHOOK: query: LOAD DATA LOCAL INPATH "../../data/files/kv1.txt" +OVERWRITE INTO TABLE spart PARTITION (ds="2008-04-08", hr="11") +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@spart +POSTHOOK: Output: default@spart@ds=2008-04-08/hr=11 +PREHOOK: query: LOAD DATA LOCAL INPATH "../../data/files/kv1.txt" +OVERWRITE INTO TABLE spart PARTITION (ds="2008-04-08", hr="12") +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@spart +POSTHOOK: query: LOAD DATA LOCAL INPATH "../../data/files/kv1.txt" +OVERWRITE INTO TABLE spart PARTITION (ds="2008-04-08", hr="12") +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@spart +POSTHOOK: Output: default@spart@ds=2008-04-08/hr=12 +PREHOOK: query: desc formatted spart +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@spart +POSTHOOK: query: desc formatted spart +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@spart +# col_name data_type comment + +key string default +value string default + +# Partition Information +# col_name data_type comment + +ds string +hr string + +# Detailed Table Information +Database: default +#### A masked pattern was here #### +Retention: 0 +#### A masked pattern was here #### +Table Type: MANAGED_TABLE +Table Parameters: +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: explain extended analyze table spart compute statistics for columns +PREHOOK: type: QUERY +POSTHOOK: query: explain extended analyze table spart compute statistics for columns +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-0 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: spart + Statistics: Num rows: 58 Data size: 32968 Basic stats: COMPLETE Column stats: PARTIAL + Statistics Aggregation Key Prefix: default.spart/ + GatherStats: true + Select Operator + expressions: ds (type: string), hr (type: string), key (type: string), value (type: string) + outputColumnNames: ds, hr, key, value + Statistics: Num rows: 58 Data size: 32968 Basic stats: COMPLETE Column stats: PARTIAL + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 2 Data size: 2704 Basic stats: COMPLETE Column stats: PARTIAL + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + null sort order: aa + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 2 Data size: 2704 Basic stats: COMPLETE Column stats: PARTIAL + tag: -1 + value expressions: _col2 (type: struct), _col3 (type: struct) + auto parallelism: true + Execution mode: llap + LLAP IO: no inputs + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: hr=11 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + hr 11 + properties: + bucket_count -1 + column.name.delimiter , + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.spart + numFiles 1 + numRows 0 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 0 + serialization.ddl struct spart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + column.name.delimiter , + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.spart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct spart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.spart + name: default.spart +#### A masked pattern was here #### + Partition + base file name: hr=12 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + hr 12 + properties: + bucket_count -1 + column.name.delimiter , + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.spart + numFiles 1 + numRows 0 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 0 + serialization.ddl struct spart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + column.name.delimiter , + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.spart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct spart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.spart + name: default.spart + Truncated Path -> Alias: + /spart/ds=2008-04-08/hr=11 [spart] + /spart/ds=2008-04-08/hr=12 [spart] + Reducer 2 + Execution mode: llap + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 2 Data size: 2656 Basic stats: COMPLETE Column stats: PARTIAL + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 2 Data size: 2656 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 2 Data size: 2656 Basic stats: COMPLETE Column stats: PARTIAL +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types struct:struct:string:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-2 + Stats-Aggr Operator + Stats Aggregation Key Prefix: default.spart/ + + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.spart + Is Table Level Stats: false + +PREHOOK: query: analyze table spart compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@spart +PREHOOK: Input: default@spart@ds=2008-04-08/hr=11 +PREHOOK: Input: default@spart@ds=2008-04-08/hr=12 +PREHOOK: Output: default@spart +PREHOOK: Output: default@spart@ds=2008-04-08/hr=11 +PREHOOK: Output: default@spart@ds=2008-04-08/hr=12 +#### A masked pattern was here #### +POSTHOOK: query: analyze table spart compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@spart +POSTHOOK: Input: default@spart@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@spart@ds=2008-04-08/hr=12 +POSTHOOK: Output: default@spart +POSTHOOK: Output: default@spart@ds=2008-04-08/hr=11 +POSTHOOK: Output: default@spart@ds=2008-04-08/hr=12 +#### A masked pattern was here #### +PREHOOK: query: desc formatted spart +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@spart +POSTHOOK: query: desc formatted spart +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@spart +# col_name data_type comment + +key string default +value string default + +# Partition Information +# col_name data_type comment + +ds string +hr string + +# Detailed Table Information +Database: default +#### A masked pattern was here #### +Retention: 0 +#### A masked pattern was here #### +Table Type: MANAGED_TABLE +Table Parameters: +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: desc formatted spart PARTITION(ds='2008-04-08', hr=11) +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@spart +POSTHOOK: query: desc formatted spart PARTITION(ds='2008-04-08', hr=11) +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@spart +# col_name data_type comment + +key string default +value string default + +# Partition Information +# col_name data_type comment + +ds string +hr string + +# Detailed Partition Information +Partition Value: [2008-04-08, 11] +Database: default +Table: spart +#### A masked pattern was here #### +Partition Parameters: + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} + numFiles 1 + numRows 500 + rawDataSize 5312 + totalSize 5812 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: desc formatted spart PARTITION(ds='2008-04-08', hr=12) +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@spart +POSTHOOK: query: desc formatted spart PARTITION(ds='2008-04-08', hr=12) +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@spart +# col_name data_type comment + +key string default +value string default + +# Partition Information +# col_name data_type comment + +ds string +hr string + +# Detailed Partition Information +Partition Value: [2008-04-08, 12] +Database: default +Table: spart +#### A masked pattern was here #### +Partition Parameters: + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} + numFiles 1 + numRows 500 + rawDataSize 5312 + totalSize 5812 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: DROP TABLE IF EXISTS spart +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@spart +PREHOOK: Output: default@spart +POSTHOOK: query: DROP TABLE IF EXISTS spart +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@spart +POSTHOOK: Output: default@spart +PREHOOK: query: CREATE TABLE spart (key STRING COMMENT 'default', value STRING COMMENT 'default') +PARTITIONED BY (ds STRING, hr STRING) +STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@spart +POSTHOOK: query: CREATE TABLE spart (key STRING COMMENT 'default', value STRING COMMENT 'default') +PARTITIONED BY (ds STRING, hr STRING) +STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@spart +PREHOOK: query: LOAD DATA LOCAL INPATH "../../data/files/kv1.txt" +OVERWRITE INTO TABLE spart PARTITION (ds="2008-04-08", hr="11") +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@spart +POSTHOOK: query: LOAD DATA LOCAL INPATH "../../data/files/kv1.txt" +OVERWRITE INTO TABLE spart PARTITION (ds="2008-04-08", hr="11") +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@spart +POSTHOOK: Output: default@spart@ds=2008-04-08/hr=11 +PREHOOK: query: LOAD DATA LOCAL INPATH "../../data/files/kv1.txt" +OVERWRITE INTO TABLE spart PARTITION (ds="2008-04-08", hr="12") +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@spart +POSTHOOK: query: LOAD DATA LOCAL INPATH "../../data/files/kv1.txt" +OVERWRITE INTO TABLE spart PARTITION (ds="2008-04-08", hr="12") +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@spart +POSTHOOK: Output: default@spart@ds=2008-04-08/hr=12 +PREHOOK: query: desc formatted spart +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@spart +POSTHOOK: query: desc formatted spart +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@spart +# col_name data_type comment + +key string default +value string default + +# Partition Information +# col_name data_type comment + +ds string +hr string + +# Detailed Table Information +Database: default +#### A masked pattern was here #### +Retention: 0 +#### A masked pattern was here #### +Table Type: MANAGED_TABLE +Table Parameters: +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: explain extended analyze table spart partition(hr="11") compute statistics for columns +PREHOOK: type: QUERY +POSTHOOK: query: explain extended analyze table spart partition(hr="11") compute statistics for columns +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-0 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: spart + Statistics: Num rows: 29 Data size: 11148 Basic stats: COMPLETE Column stats: PARTIAL + Statistics Aggregation Key Prefix: default.spart/ + GatherStats: true + Select Operator + expressions: ds (type: string), key (type: string), value (type: string) + outputColumnNames: ds, key, value + Statistics: Num rows: 29 Data size: 11148 Basic stats: COMPLETE Column stats: PARTIAL + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string), '11' (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1254 Basic stats: COMPLETE Column stats: PARTIAL + Reduce Output Operator + key expressions: _col0 (type: string), '11' (type: string) + null sort order: aa + sort order: ++ + Map-reduce partition columns: _col0 (type: string), '11' (type: string) + Statistics: Num rows: 1 Data size: 1254 Basic stats: COMPLETE Column stats: PARTIAL + tag: -1 + value expressions: _col2 (type: struct), _col3 (type: struct) + auto parallelism: true + Execution mode: llap + LLAP IO: no inputs + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: hr=11 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + hr 11 + properties: + bucket_count -1 + column.name.delimiter , + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.spart + numFiles 1 + numRows 0 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 0 + serialization.ddl struct spart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + column.name.delimiter , + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.spart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct spart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.spart + name: default.spart + Truncated Path -> Alias: + /spart/ds=2008-04-08/hr=11 [spart] + Reducer 2 + Execution mode: llap + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), '11' (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1230 Basic stats: COMPLETE Column stats: PARTIAL + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), '11' (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1230 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1230 Basic stats: COMPLETE Column stats: PARTIAL +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types struct:struct:string:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-2 + Stats-Aggr Operator + Stats Aggregation Key Prefix: default.spart/ + + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.spart + Is Table Level Stats: false + +PREHOOK: query: analyze table spart partition(hr="11") compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@spart +PREHOOK: Input: default@spart@ds=2008-04-08/hr=11 +PREHOOK: Output: default@spart +PREHOOK: Output: default@spart@ds=2008-04-08/hr=11 +#### A masked pattern was here #### +POSTHOOK: query: analyze table spart partition(hr="11") compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@spart +POSTHOOK: Input: default@spart@ds=2008-04-08/hr=11 +POSTHOOK: Output: default@spart +POSTHOOK: Output: default@spart@ds=2008-04-08/hr=11 +#### A masked pattern was here #### +PREHOOK: query: desc formatted spart +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@spart +POSTHOOK: query: desc formatted spart +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@spart +# col_name data_type comment + +key string default +value string default + +# Partition Information +# col_name data_type comment + +ds string +hr string + +# Detailed Table Information +Database: default +#### A masked pattern was here #### +Retention: 0 +#### A masked pattern was here #### +Table Type: MANAGED_TABLE +Table Parameters: +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: desc formatted spart PARTITION(ds='2008-04-08', hr=11) +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@spart +POSTHOOK: query: desc formatted spart PARTITION(ds='2008-04-08', hr=11) +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@spart +# col_name data_type comment + +key string default +value string default + +# Partition Information +# col_name data_type comment + +ds string +hr string + +# Detailed Partition Information +Partition Value: [2008-04-08, 11] +Database: default +Table: spart +#### A masked pattern was here #### +Partition Parameters: + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} + numFiles 1 + numRows 500 + rawDataSize 5312 + totalSize 5812 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: desc formatted spart PARTITION(ds='2008-04-08', hr=12) +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@spart +POSTHOOK: query: desc formatted spart PARTITION(ds='2008-04-08', hr=12) +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@spart +# col_name data_type comment + +key string default +value string default + +# Partition Information +# col_name data_type comment + +ds string +hr string + +# Detailed Partition Information +Partition Value: [2008-04-08, 12] +Database: default +Table: spart +#### A masked pattern was here #### +Partition Parameters: + numFiles 1 + numRows 0 + rawDataSize 0 + totalSize 5812 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 diff --git a/ql/src/test/results/clientpositive/llap/columnstats_part_coltype.q.out b/ql/src/test/results/clientpositive/llap/columnstats_part_coltype.q.out index 3e28e58..6c5babb 100644 --- a/ql/src/test/results/clientpositive/llap/columnstats_part_coltype.q.out +++ b/ql/src/test/results/clientpositive/llap/columnstats_part_coltype.q.out @@ -230,6 +230,12 @@ PREHOOK: Input: default@partcolstats@ds=2015-04-02/hr=2/part=partB PREHOOK: Input: default@partcolstats@ds=2015-04-02/hr=3/part=partA PREHOOK: Input: default@partcolstats@ds=2015-04-03/hr=3/part=partA PREHOOK: Input: default@partcolstats@ds=2015-04-03/hr=3/part=partB +PREHOOK: Output: default@partcolstats +PREHOOK: Output: default@partcolstats@ds=2015-04-02/hr=2/part=partA +PREHOOK: Output: default@partcolstats@ds=2015-04-02/hr=2/part=partB +PREHOOK: Output: default@partcolstats@ds=2015-04-02/hr=3/part=partA +PREHOOK: Output: default@partcolstats@ds=2015-04-03/hr=3/part=partA +PREHOOK: Output: default@partcolstats@ds=2015-04-03/hr=3/part=partB #### A masked pattern was here #### POSTHOOK: query: analyze table partcolstats partition (ds, hr, part) compute statistics for columns POSTHOOK: type: QUERY @@ -239,6 +245,12 @@ POSTHOOK: Input: default@partcolstats@ds=2015-04-02/hr=2/part=partB POSTHOOK: Input: default@partcolstats@ds=2015-04-02/hr=3/part=partA POSTHOOK: Input: default@partcolstats@ds=2015-04-03/hr=3/part=partA POSTHOOK: Input: default@partcolstats@ds=2015-04-03/hr=3/part=partB +POSTHOOK: Output: default@partcolstats +POSTHOOK: Output: default@partcolstats@ds=2015-04-02/hr=2/part=partA +POSTHOOK: Output: default@partcolstats@ds=2015-04-02/hr=2/part=partB +POSTHOOK: Output: default@partcolstats@ds=2015-04-02/hr=3/part=partA +POSTHOOK: Output: default@partcolstats@ds=2015-04-03/hr=3/part=partA +POSTHOOK: Output: default@partcolstats@ds=2015-04-03/hr=3/part=partB #### A masked pattern was here #### PREHOOK: query: describe formatted partcolstats partition (ds=date '2015-04-03', hr=3, part='partA') key PREHOOK: type: DESCTABLE diff --git a/ql/src/test/results/clientpositive/llap/deleteAnalyze.q.out b/ql/src/test/results/clientpositive/llap/deleteAnalyze.q.out index ba14835..98ba6af 100644 --- a/ql/src/test/results/clientpositive/llap/deleteAnalyze.q.out +++ b/ql/src/test/results/clientpositive/llap/deleteAnalyze.q.out @@ -75,18 +75,22 @@ amount decimal(10,3) PREHOOK: query: analyze table testdeci2 compute statistics for columns PREHOOK: type: QUERY PREHOOK: Input: default@testdeci2 +PREHOOK: Output: default@testdeci2 #### A masked pattern was here #### POSTHOOK: query: analyze table testdeci2 compute statistics for columns POSTHOOK: type: QUERY POSTHOOK: Input: default@testdeci2 +POSTHOOK: Output: default@testdeci2 #### A masked pattern was here #### PREHOOK: query: analyze table testdeci2 compute statistics for columns PREHOOK: type: QUERY PREHOOK: Input: default@testdeci2 +PREHOOK: Output: default@testdeci2 #### A masked pattern was here #### POSTHOOK: query: analyze table testdeci2 compute statistics for columns POSTHOOK: type: QUERY POSTHOOK: Input: default@testdeci2 +POSTHOOK: Output: default@testdeci2 #### A masked pattern was here #### PREHOOK: query: explain select s.id, diff --git a/ql/src/test/results/clientpositive/llap/drop_partition_with_stats.q.out b/ql/src/test/results/clientpositive/llap/drop_partition_with_stats.q.out index c6ab40d..98b2193 100644 --- a/ql/src/test/results/clientpositive/llap/drop_partition_with_stats.q.out +++ b/ql/src/test/results/clientpositive/llap/drop_partition_with_stats.q.out @@ -42,12 +42,18 @@ PREHOOK: Input: partstatsdb1@testtable PREHOOK: Input: partstatsdb1@testtable@part1=p11/part2=P12 PREHOOK: Input: partstatsdb1@testtable@part1=p21/part2=P22 #### A masked pattern was here #### +PREHOOK: Output: partstatsdb1@testtable +PREHOOK: Output: partstatsdb1@testtable@part1=p11/part2=P12 +PREHOOK: Output: partstatsdb1@testtable@part1=p21/part2=P22 POSTHOOK: query: ANALYZE TABLE testtable COMPUTE STATISTICS FOR COLUMNS key POSTHOOK: type: QUERY POSTHOOK: Input: partstatsdb1@testtable POSTHOOK: Input: partstatsdb1@testtable@part1=p11/part2=P12 POSTHOOK: Input: partstatsdb1@testtable@part1=p21/part2=P22 #### A masked pattern was here #### +POSTHOOK: Output: partstatsdb1@testtable +POSTHOOK: Output: partstatsdb1@testtable@part1=p11/part2=P12 +POSTHOOK: Output: partstatsdb1@testtable@part1=p21/part2=P22 PREHOOK: query: ANALYZE TABLE testtable PARTITION (part1='p11', Part2='P12') COMPUTE STATISTICS FOR COLUMNS key PREHOOK: type: QUERY PREHOOK: Input: partstatsdb1@testtable @@ -110,6 +116,11 @@ PREHOOK: Input: partstatsdb1@testtable1@part1=p11/part2=P12 PREHOOK: Input: partstatsdb1@testtable1@part1=p21/part2=P22 PREHOOK: Input: partstatsdb1@testtable1@part1=p31/part2=P32 #### A masked pattern was here #### +PREHOOK: Output: partstatsdb1@testtable1 +PREHOOK: Output: partstatsdb1@testtable1@part1=p11/part2=P11 +PREHOOK: Output: partstatsdb1@testtable1@part1=p11/part2=P12 +PREHOOK: Output: partstatsdb1@testtable1@part1=p21/part2=P22 +PREHOOK: Output: partstatsdb1@testtable1@part1=p31/part2=P32 POSTHOOK: query: ANALYZE TABLE TestTable1 COMPUTE STATISTICS FOR COLUMNS key POSTHOOK: type: QUERY POSTHOOK: Input: partstatsdb1@testtable1 @@ -118,6 +129,11 @@ POSTHOOK: Input: partstatsdb1@testtable1@part1=p11/part2=P12 POSTHOOK: Input: partstatsdb1@testtable1@part1=p21/part2=P22 POSTHOOK: Input: partstatsdb1@testtable1@part1=p31/part2=P32 #### A masked pattern was here #### +POSTHOOK: Output: partstatsdb1@testtable1 +POSTHOOK: Output: partstatsdb1@testtable1@part1=p11/part2=P11 +POSTHOOK: Output: partstatsdb1@testtable1@part1=p11/part2=P12 +POSTHOOK: Output: partstatsdb1@testtable1@part1=p21/part2=P22 +POSTHOOK: Output: partstatsdb1@testtable1@part1=p31/part2=P32 PREHOOK: query: ANALYZE TABLE TestTable1 PARTITION (part1='p11') COMPUTE STATISTICS FOR COLUMNS key PREHOOK: type: QUERY PREHOOK: Input: partstatsdb1@testtable1 @@ -172,12 +188,18 @@ PREHOOK: Input: partstatsdb1@testtable2 PREHOOK: Input: partstatsdb1@testtable2@part1=p11/part2=P12 PREHOOK: Input: partstatsdb1@testtable2@part1=p21/part2=P22 #### A masked pattern was here #### +PREHOOK: Output: partstatsdb1@testtable2 +PREHOOK: Output: partstatsdb1@testtable2@part1=p11/part2=P12 +PREHOOK: Output: partstatsdb1@testtable2@part1=p21/part2=P22 POSTHOOK: query: ANALYZE TABLE TESTTABLE2 COMPUTE STATISTICS FOR COLUMNS key POSTHOOK: type: QUERY POSTHOOK: Input: partstatsdb1@testtable2 POSTHOOK: Input: partstatsdb1@testtable2@part1=p11/part2=P12 POSTHOOK: Input: partstatsdb1@testtable2@part1=p21/part2=P22 #### A masked pattern was here #### +POSTHOOK: Output: partstatsdb1@testtable2 +POSTHOOK: Output: partstatsdb1@testtable2@part1=p11/part2=P12 +POSTHOOK: Output: partstatsdb1@testtable2@part1=p21/part2=P22 PREHOOK: query: ANALYZE TABLE TESTTABLE2 PARTITION (part1='p11', Part2='P12') COMPUTE STATISTICS FOR COLUMNS key PREHOOK: type: QUERY PREHOOK: Input: partstatsdb1@testtable2 @@ -288,12 +310,18 @@ PREHOOK: Input: partstatsdb2@testtable PREHOOK: Input: partstatsdb2@testtable@part1=p11/part2=P12 PREHOOK: Input: partstatsdb2@testtable@part1=p21/part2=P22 #### A masked pattern was here #### +PREHOOK: Output: partstatsdb2@testtable +PREHOOK: Output: partstatsdb2@testtable@part1=p11/part2=P12 +PREHOOK: Output: partstatsdb2@testtable@part1=p21/part2=P22 POSTHOOK: query: ANALYZE TABLE testtable COMPUTE STATISTICS FOR COLUMNS key POSTHOOK: type: QUERY POSTHOOK: Input: partstatsdb2@testtable POSTHOOK: Input: partstatsdb2@testtable@part1=p11/part2=P12 POSTHOOK: Input: partstatsdb2@testtable@part1=p21/part2=P22 #### A masked pattern was here #### +POSTHOOK: Output: partstatsdb2@testtable +POSTHOOK: Output: partstatsdb2@testtable@part1=p11/part2=P12 +POSTHOOK: Output: partstatsdb2@testtable@part1=p21/part2=P22 PREHOOK: query: ANALYZE TABLE testtable PARTITION (part1='p11', Part2='P12') COMPUTE STATISTICS FOR COLUMNS key PREHOOK: type: QUERY PREHOOK: Input: partstatsdb2@testtable @@ -356,6 +384,11 @@ PREHOOK: Input: partstatsdb2@testtable1@part1=p11/part2=P12 PREHOOK: Input: partstatsdb2@testtable1@part1=p21/part2=P22 PREHOOK: Input: partstatsdb2@testtable1@part1=p31/part2=P32 #### A masked pattern was here #### +PREHOOK: Output: partstatsdb2@testtable1 +PREHOOK: Output: partstatsdb2@testtable1@part1=p11/part2=P11 +PREHOOK: Output: partstatsdb2@testtable1@part1=p11/part2=P12 +PREHOOK: Output: partstatsdb2@testtable1@part1=p21/part2=P22 +PREHOOK: Output: partstatsdb2@testtable1@part1=p31/part2=P32 POSTHOOK: query: ANALYZE TABLE TestTable1 COMPUTE STATISTICS FOR COLUMNS key POSTHOOK: type: QUERY POSTHOOK: Input: partstatsdb2@testtable1 @@ -364,6 +397,11 @@ POSTHOOK: Input: partstatsdb2@testtable1@part1=p11/part2=P12 POSTHOOK: Input: partstatsdb2@testtable1@part1=p21/part2=P22 POSTHOOK: Input: partstatsdb2@testtable1@part1=p31/part2=P32 #### A masked pattern was here #### +POSTHOOK: Output: partstatsdb2@testtable1 +POSTHOOK: Output: partstatsdb2@testtable1@part1=p11/part2=P11 +POSTHOOK: Output: partstatsdb2@testtable1@part1=p11/part2=P12 +POSTHOOK: Output: partstatsdb2@testtable1@part1=p21/part2=P22 +POSTHOOK: Output: partstatsdb2@testtable1@part1=p31/part2=P32 PREHOOK: query: ANALYZE TABLE TestTable1 PARTITION (part1='p11') COMPUTE STATISTICS FOR COLUMNS key PREHOOK: type: QUERY PREHOOK: Input: partstatsdb2@testtable1 @@ -418,12 +456,18 @@ PREHOOK: Input: partstatsdb2@testtable2 PREHOOK: Input: partstatsdb2@testtable2@part1=p11/part2=P12 PREHOOK: Input: partstatsdb2@testtable2@part1=p21/part2=P22 #### A masked pattern was here #### +PREHOOK: Output: partstatsdb2@testtable2 +PREHOOK: Output: partstatsdb2@testtable2@part1=p11/part2=P12 +PREHOOK: Output: partstatsdb2@testtable2@part1=p21/part2=P22 POSTHOOK: query: ANALYZE TABLE TESTTABLE2 COMPUTE STATISTICS FOR COLUMNS key POSTHOOK: type: QUERY POSTHOOK: Input: partstatsdb2@testtable2 POSTHOOK: Input: partstatsdb2@testtable2@part1=p11/part2=P12 POSTHOOK: Input: partstatsdb2@testtable2@part1=p21/part2=P22 #### A masked pattern was here #### +POSTHOOK: Output: partstatsdb2@testtable2 +POSTHOOK: Output: partstatsdb2@testtable2@part1=p11/part2=P12 +POSTHOOK: Output: partstatsdb2@testtable2@part1=p21/part2=P22 PREHOOK: query: ANALYZE TABLE TESTTABLE2 PARTITION (part1='p11', Part2='P12') COMPUTE STATISTICS FOR COLUMNS key PREHOOK: type: QUERY PREHOOK: Input: partstatsdb2@testtable2 diff --git a/ql/src/test/results/clientpositive/llap/explainuser_2.q.out b/ql/src/test/results/clientpositive/llap/explainuser_2.q.out index e898111..087a531 100644 --- a/ql/src/test/results/clientpositive/llap/explainuser_2.q.out +++ b/ql/src/test/results/clientpositive/llap/explainuser_2.q.out @@ -127,10 +127,12 @@ POSTHOOK: Output: default@ss PREHOOK: query: ANALYZE TABLE ss COMPUTE STATISTICS FOR COLUMNS k1,v1,k2,v2,k3,v3 PREHOOK: type: QUERY PREHOOK: Input: default@ss +PREHOOK: Output: default@ss #### A masked pattern was here #### POSTHOOK: query: ANALYZE TABLE ss COMPUTE STATISTICS FOR COLUMNS k1,v1,k2,v2,k3,v3 POSTHOOK: type: QUERY POSTHOOK: Input: default@ss +POSTHOOK: Output: default@ss #### A masked pattern was here #### PREHOOK: query: ANALYZE TABLE sr COMPUTE STATISTICS PREHOOK: type: QUERY @@ -143,10 +145,12 @@ POSTHOOK: Output: default@sr PREHOOK: query: ANALYZE TABLE sr COMPUTE STATISTICS FOR COLUMNS k1,v1,k2,v2,k3,v3 PREHOOK: type: QUERY PREHOOK: Input: default@sr +PREHOOK: Output: default@sr #### A masked pattern was here #### POSTHOOK: query: ANALYZE TABLE sr COMPUTE STATISTICS FOR COLUMNS k1,v1,k2,v2,k3,v3 POSTHOOK: type: QUERY POSTHOOK: Input: default@sr +POSTHOOK: Output: default@sr #### A masked pattern was here #### PREHOOK: query: ANALYZE TABLE cs COMPUTE STATISTICS PREHOOK: type: QUERY @@ -159,10 +163,12 @@ POSTHOOK: Output: default@cs PREHOOK: query: ANALYZE TABLE cs COMPUTE STATISTICS FOR COLUMNS k1,v1,k2,v2,k3,v3 PREHOOK: type: QUERY PREHOOK: Input: default@cs +PREHOOK: Output: default@cs #### A masked pattern was here #### POSTHOOK: query: ANALYZE TABLE cs COMPUTE STATISTICS FOR COLUMNS k1,v1,k2,v2,k3,v3 POSTHOOK: type: QUERY POSTHOOK: Input: default@cs +POSTHOOK: Output: default@cs #### A masked pattern was here #### PREHOOK: query: EXPLAIN SELECT x.key, z.value, y.value diff --git a/ql/src/test/results/clientpositive/llap/llap_stats.q.out b/ql/src/test/results/clientpositive/llap/llap_stats.q.out index f6921f1..c8b9983 100644 --- a/ql/src/test/results/clientpositive/llap/llap_stats.q.out +++ b/ql/src/test/results/clientpositive/llap/llap_stats.q.out @@ -93,7 +93,8 @@ POSTHOOK: query: explain analyze table llap_stats partition (cint) compute stati POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-0 is a root stage - Stage-2 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-0 STAGE PLANS: Stage: Stage-0 @@ -154,6 +155,9 @@ STAGE PLANS: Column Types: tinyint, smallint Table: default.llap_stats + Stage: Stage-3 + Stats-Aggr Operator + PREHOOK: query: analyze table llap_stats partition (cint) compute statistics for columns PREHOOK: type: QUERY PREHOOK: Input: default@llap_stats @@ -167,6 +171,17 @@ PREHOOK: Input: default@llap_stats@cint=-9566 PREHOOK: Input: default@llap_stats@cint=15007 PREHOOK: Input: default@llap_stats@cint=4963 PREHOOK: Input: default@llap_stats@cint=7021 +PREHOOK: Output: default@llap_stats +PREHOOK: Output: default@llap_stats@cint=-13326 +PREHOOK: Output: default@llap_stats@cint=-15431 +PREHOOK: Output: default@llap_stats@cint=-15549 +PREHOOK: Output: default@llap_stats@cint=-15813 +PREHOOK: Output: default@llap_stats@cint=-4213 +PREHOOK: Output: default@llap_stats@cint=-7824 +PREHOOK: Output: default@llap_stats@cint=-9566 +PREHOOK: Output: default@llap_stats@cint=15007 +PREHOOK: Output: default@llap_stats@cint=4963 +PREHOOK: Output: default@llap_stats@cint=7021 #### A masked pattern was here #### POSTHOOK: query: analyze table llap_stats partition (cint) compute statistics for columns POSTHOOK: type: QUERY @@ -181,6 +196,17 @@ POSTHOOK: Input: default@llap_stats@cint=-9566 POSTHOOK: Input: default@llap_stats@cint=15007 POSTHOOK: Input: default@llap_stats@cint=4963 POSTHOOK: Input: default@llap_stats@cint=7021 +POSTHOOK: Output: default@llap_stats +POSTHOOK: Output: default@llap_stats@cint=-13326 +POSTHOOK: Output: default@llap_stats@cint=-15431 +POSTHOOK: Output: default@llap_stats@cint=-15549 +POSTHOOK: Output: default@llap_stats@cint=-15813 +POSTHOOK: Output: default@llap_stats@cint=-4213 +POSTHOOK: Output: default@llap_stats@cint=-7824 +POSTHOOK: Output: default@llap_stats@cint=-9566 +POSTHOOK: Output: default@llap_stats@cint=15007 +POSTHOOK: Output: default@llap_stats@cint=4963 +POSTHOOK: Output: default@llap_stats@cint=7021 #### A masked pattern was here #### PREHOOK: query: DROP TABLE llap_stats PREHOOK: type: DROPTABLE diff --git a/ql/src/test/results/clientpositive/llap/llapdecider.q.out b/ql/src/test/results/clientpositive/llap/llapdecider.q.out index d514f42..69312cd 100644 --- a/ql/src/test/results/clientpositive/llap/llapdecider.q.out +++ b/ql/src/test/results/clientpositive/llap/llapdecider.q.out @@ -230,10 +230,12 @@ STAGE PLANS: PREHOOK: query: analyze table src_orc compute statistics for columns PREHOOK: type: QUERY PREHOOK: Input: default@src_orc +PREHOOK: Output: default@src_orc #### A masked pattern was here #### POSTHOOK: query: analyze table src_orc compute statistics for columns POSTHOOK: type: QUERY POSTHOOK: Input: default@src_orc +POSTHOOK: Output: default@src_orc #### A masked pattern was here #### PREHOOK: query: EXPLAIN SELECT key, count(value) as cnt FROM src_orc GROUP BY key ORDER BY cnt PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/llap/metadata_only_queries.q.out b/ql/src/test/results/clientpositive/llap/metadata_only_queries.q.out index c8190bd..6385619 100644 --- a/ql/src/test/results/clientpositive/llap/metadata_only_queries.q.out +++ b/ql/src/test/results/clientpositive/llap/metadata_only_queries.q.out @@ -427,10 +427,12 @@ STAGE PLANS: PREHOOK: query: analyze table stats_tbl compute statistics for columns t,si,i,b,f,d,bo,s,bin PREHOOK: type: QUERY PREHOOK: Input: default@stats_tbl +PREHOOK: Output: default@stats_tbl #### A masked pattern was here #### POSTHOOK: query: analyze table stats_tbl compute statistics for columns t,si,i,b,f,d,bo,s,bin POSTHOOK: type: QUERY POSTHOOK: Input: default@stats_tbl +POSTHOOK: Output: default@stats_tbl #### A masked pattern was here #### PREHOOK: query: analyze table stats_tbl_part partition(dt='2010') compute statistics for columns t,si,i,b,f,d,bo,s,bin PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/llap/schema_evol_stats.q.out b/ql/src/test/results/clientpositive/llap/schema_evol_stats.q.out index be8ca4a..3a83fb9 100644 --- a/ql/src/test/results/clientpositive/llap/schema_evol_stats.q.out +++ b/ql/src/test/results/clientpositive/llap/schema_evol_stats.q.out @@ -37,12 +37,18 @@ PREHOOK: type: QUERY PREHOOK: Input: default@partitioned1 PREHOOK: Input: default@partitioned1@part=1 PREHOOK: Input: default@partitioned1@part=2 +PREHOOK: Output: default@partitioned1 +PREHOOK: Output: default@partitioned1@part=1 +PREHOOK: Output: default@partitioned1@part=2 #### A masked pattern was here #### POSTHOOK: query: analyze table partitioned1 compute statistics for columns POSTHOOK: type: QUERY POSTHOOK: Input: default@partitioned1 POSTHOOK: Input: default@partitioned1@part=1 POSTHOOK: Input: default@partitioned1@part=2 +POSTHOOK: Output: default@partitioned1 +POSTHOOK: Output: default@partitioned1@part=1 +POSTHOOK: Output: default@partitioned1@part=2 #### A masked pattern was here #### PREHOOK: query: desc formatted partitioned1 PREHOOK: type: DESCTABLE @@ -231,12 +237,18 @@ PREHOOK: type: QUERY PREHOOK: Input: default@partitioned1 PREHOOK: Input: default@partitioned1@part=1 PREHOOK: Input: default@partitioned1@part=2 +PREHOOK: Output: default@partitioned1 +PREHOOK: Output: default@partitioned1@part=1 +PREHOOK: Output: default@partitioned1@part=2 #### A masked pattern was here #### POSTHOOK: query: analyze table partitioned1 compute statistics for columns POSTHOOK: type: QUERY POSTHOOK: Input: default@partitioned1 POSTHOOK: Input: default@partitioned1@part=1 POSTHOOK: Input: default@partitioned1@part=2 +POSTHOOK: Output: default@partitioned1 +POSTHOOK: Output: default@partitioned1@part=1 +POSTHOOK: Output: default@partitioned1@part=2 #### A masked pattern was here #### PREHOOK: query: desc formatted partitioned1 PREHOOK: type: DESCTABLE diff --git a/ql/src/test/results/clientpositive/llap/special_character_in_tabnames_1.q.out b/ql/src/test/results/clientpositive/llap/special_character_in_tabnames_1.q.out index d01b373..f6f3aac 100644 --- a/ql/src/test/results/clientpositive/llap/special_character_in_tabnames_1.q.out +++ b/ql/src/test/results/clientpositive/llap/special_character_in_tabnames_1.q.out @@ -162,11 +162,15 @@ PREHOOK: query: analyze table `c/b/o_t1` compute statistics for columns key, val PREHOOK: type: QUERY PREHOOK: Input: default@c/b/o_t1 PREHOOK: Input: default@c/b/o_t1@dt=2014 +PREHOOK: Output: default@c/b/o_t1 +PREHOOK: Output: default@c/b/o_t1@dt=2014 #### A masked pattern was here #### POSTHOOK: query: analyze table `c/b/o_t1` compute statistics for columns key, value, c_int, c_float, c_boolean POSTHOOK: type: QUERY POSTHOOK: Input: default@c/b/o_t1 POSTHOOK: Input: default@c/b/o_t1@dt=2014 +POSTHOOK: Output: default@c/b/o_t1 +POSTHOOK: Output: default@c/b/o_t1@dt=2014 #### A masked pattern was here #### PREHOOK: query: analyze table `//cbo_t2` partition (dt) compute statistics PREHOOK: type: QUERY @@ -184,11 +188,15 @@ PREHOOK: query: analyze table `//cbo_t2` compute statistics for columns key, val PREHOOK: type: QUERY PREHOOK: Input: default@//cbo_t2 PREHOOK: Input: default@//cbo_t2@dt=2014 +PREHOOK: Output: default@//cbo_t2 +PREHOOK: Output: default@//cbo_t2@dt=2014 #### A masked pattern was here #### POSTHOOK: query: analyze table `//cbo_t2` compute statistics for columns key, value, c_int, c_float, c_boolean POSTHOOK: type: QUERY POSTHOOK: Input: default@//cbo_t2 POSTHOOK: Input: default@//cbo_t2@dt=2014 +POSTHOOK: Output: default@//cbo_t2 +POSTHOOK: Output: default@//cbo_t2@dt=2014 #### A masked pattern was here #### PREHOOK: query: analyze table `cbo_/t3////` compute statistics PREHOOK: type: QUERY @@ -201,10 +209,12 @@ POSTHOOK: Output: default@cbo_/t3//// PREHOOK: query: analyze table `cbo_/t3////` compute statistics for columns key, value, c_int, c_float, c_boolean PREHOOK: type: QUERY PREHOOK: Input: default@cbo_/t3//// +PREHOOK: Output: default@cbo_/t3//// #### A masked pattern was here #### POSTHOOK: query: analyze table `cbo_/t3////` compute statistics for columns key, value, c_int, c_float, c_boolean POSTHOOK: type: QUERY POSTHOOK: Input: default@cbo_/t3//// +POSTHOOK: Output: default@cbo_/t3//// #### A masked pattern was here #### PREHOOK: query: analyze table `src/_/cbo` compute statistics PREHOOK: type: QUERY @@ -217,10 +227,12 @@ POSTHOOK: Output: default@src/_/cbo PREHOOK: query: analyze table `src/_/cbo` compute statistics for columns PREHOOK: type: QUERY PREHOOK: Input: default@src/_/cbo +PREHOOK: Output: default@src/_/cbo #### A masked pattern was here #### POSTHOOK: query: analyze table `src/_/cbo` compute statistics for columns POSTHOOK: type: QUERY POSTHOOK: Input: default@src/_/cbo +POSTHOOK: Output: default@src/_/cbo #### A masked pattern was here #### PREHOOK: query: analyze table `p/a/r/t` compute statistics PREHOOK: type: QUERY @@ -233,10 +245,12 @@ POSTHOOK: Output: default@p/a/r/t PREHOOK: query: analyze table `p/a/r/t` compute statistics for columns PREHOOK: type: QUERY PREHOOK: Input: default@p/a/r/t +PREHOOK: Output: default@p/a/r/t #### A masked pattern was here #### POSTHOOK: query: analyze table `p/a/r/t` compute statistics for columns POSTHOOK: type: QUERY POSTHOOK: Input: default@p/a/r/t +POSTHOOK: Output: default@p/a/r/t #### A masked pattern was here #### PREHOOK: query: analyze table `line/item` compute statistics PREHOOK: type: QUERY @@ -249,10 +263,12 @@ POSTHOOK: Output: default@line/item PREHOOK: query: analyze table `line/item` compute statistics for columns PREHOOK: type: QUERY PREHOOK: Input: default@line/item +PREHOOK: Output: default@line/item #### A masked pattern was here #### POSTHOOK: query: analyze table `line/item` compute statistics for columns POSTHOOK: type: QUERY POSTHOOK: Input: default@line/item +POSTHOOK: Output: default@line/item #### A masked pattern was here #### PREHOOK: query: select key, (c_int+1)+2 as x, sum(c_int) from `c/b/o_t1` group by c_float, `c/b/o_t1`.c_int, key PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/llap/stats_only_null.q.out b/ql/src/test/results/clientpositive/llap/stats_only_null.q.out index c905ceb..86163fe 100644 --- a/ql/src/test/results/clientpositive/llap/stats_only_null.q.out +++ b/ql/src/test/results/clientpositive/llap/stats_only_null.q.out @@ -189,10 +189,12 @@ STAGE PLANS: PREHOOK: query: analyze table stats_null compute statistics for columns a,b,c,d PREHOOK: type: QUERY PREHOOK: Input: default@stats_null +PREHOOK: Output: default@stats_null #### A masked pattern was here #### POSTHOOK: query: analyze table stats_null compute statistics for columns a,b,c,d POSTHOOK: type: QUERY POSTHOOK: Input: default@stats_null +POSTHOOK: Output: default@stats_null #### A masked pattern was here #### PREHOOK: query: analyze table stats_null_part partition(dt='2010') compute statistics for columns a,b,c,d PREHOOK: type: QUERY @@ -384,12 +386,18 @@ PREHOOK: type: QUERY PREHOOK: Input: default@stats_null_part PREHOOK: Input: default@stats_null_part@dt=1 PREHOOK: Input: default@stats_null_part@dt=__HIVE_DEFAULT_PARTITION__ +PREHOOK: Output: default@stats_null_part +PREHOOK: Output: default@stats_null_part@dt=1 +PREHOOK: Output: default@stats_null_part@dt=__HIVE_DEFAULT_PARTITION__ #### A masked pattern was here #### POSTHOOK: query: analyze table stats_null_part compute statistics for columns POSTHOOK: type: QUERY POSTHOOK: Input: default@stats_null_part POSTHOOK: Input: default@stats_null_part@dt=1 POSTHOOK: Input: default@stats_null_part@dt=__HIVE_DEFAULT_PARTITION__ +POSTHOOK: Output: default@stats_null_part +POSTHOOK: Output: default@stats_null_part@dt=1 +POSTHOOK: Output: default@stats_null_part@dt=__HIVE_DEFAULT_PARTITION__ #### A masked pattern was here #### PREHOOK: query: describe formatted stats_null_part partition(dt = 1) a PREHOOK: type: DESCTABLE diff --git a/ql/src/test/results/clientpositive/llap/union_remove_26.q.out b/ql/src/test/results/clientpositive/llap/union_remove_26.q.out index 18b9aa5..67fef54 100644 --- a/ql/src/test/results/clientpositive/llap/union_remove_26.q.out +++ b/ql/src/test/results/clientpositive/llap/union_remove_26.q.out @@ -103,18 +103,22 @@ POSTHOOK: Lineage: inputtbl3.val SIMPLE [(inputsrctbl3)inputsrctbl3.FieldSchema( PREHOOK: query: analyze table inputTbl1 compute statistics for columns PREHOOK: type: QUERY PREHOOK: Input: default@inputtbl1 +PREHOOK: Output: default@inputtbl1 #### A masked pattern was here #### POSTHOOK: query: analyze table inputTbl1 compute statistics for columns POSTHOOK: type: QUERY POSTHOOK: Input: default@inputtbl1 +POSTHOOK: Output: default@inputtbl1 #### A masked pattern was here #### PREHOOK: query: analyze table inputTbl3 compute statistics for columns PREHOOK: type: QUERY PREHOOK: Input: default@inputtbl3 +PREHOOK: Output: default@inputtbl3 #### A masked pattern was here #### POSTHOOK: query: analyze table inputTbl3 compute statistics for columns POSTHOOK: type: QUERY POSTHOOK: Input: default@inputtbl3 +POSTHOOK: Output: default@inputtbl3 #### A masked pattern was here #### PREHOOK: query: explain SELECT count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl1 @@ -282,10 +286,12 @@ POSTHOOK: Input: default@inputtbl3 PREHOOK: query: analyze table inputTbl2 compute statistics for columns PREHOOK: type: QUERY PREHOOK: Input: default@inputtbl2 +PREHOOK: Output: default@inputtbl2 #### A masked pattern was here #### POSTHOOK: query: analyze table inputTbl2 compute statistics for columns POSTHOOK: type: QUERY POSTHOOK: Input: default@inputtbl2 +POSTHOOK: Output: default@inputtbl2 #### A masked pattern was here #### PREHOOK: query: explain SELECT count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl1 diff --git a/ql/src/test/results/clientpositive/llap/vector_outer_join1.q.out b/ql/src/test/results/clientpositive/llap/vector_outer_join1.q.out index 072399e..e687c5b 100644 --- a/ql/src/test/results/clientpositive/llap/vector_outer_join1.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_outer_join1.q.out @@ -186,10 +186,12 @@ POSTHOOK: Output: default@small_alltypesorc_a PREHOOK: query: ANALYZE TABLE small_alltypesorc_a COMPUTE STATISTICS FOR COLUMNS PREHOOK: type: QUERY PREHOOK: Input: default@small_alltypesorc_a +PREHOOK: Output: default@small_alltypesorc_a #### A masked pattern was here #### POSTHOOK: query: ANALYZE TABLE small_alltypesorc_a COMPUTE STATISTICS FOR COLUMNS POSTHOOK: type: QUERY POSTHOOK: Input: default@small_alltypesorc_a +POSTHOOK: Output: default@small_alltypesorc_a #### A masked pattern was here #### PREHOOK: query: select * from small_alltypesorc_a PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/llap/vector_outer_join2.q.out b/ql/src/test/results/clientpositive/llap/vector_outer_join2.q.out index d79c71b..1dd24b0 100644 --- a/ql/src/test/results/clientpositive/llap/vector_outer_join2.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_outer_join2.q.out @@ -191,10 +191,12 @@ POSTHOOK: Output: default@small_alltypesorc_a PREHOOK: query: ANALYZE TABLE small_alltypesorc_a COMPUTE STATISTICS FOR COLUMNS PREHOOK: type: QUERY PREHOOK: Input: default@small_alltypesorc_a +PREHOOK: Output: default@small_alltypesorc_a #### A masked pattern was here #### POSTHOOK: query: ANALYZE TABLE small_alltypesorc_a COMPUTE STATISTICS FOR COLUMNS POSTHOOK: type: QUERY POSTHOOK: Input: default@small_alltypesorc_a +POSTHOOK: Output: default@small_alltypesorc_a #### A masked pattern was here #### PREHOOK: query: select * from small_alltypesorc_a PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/llap/vector_outer_join3.q.out b/ql/src/test/results/clientpositive/llap/vector_outer_join3.q.out index dbbfd34..f8d1ec2 100644 --- a/ql/src/test/results/clientpositive/llap/vector_outer_join3.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_outer_join3.q.out @@ -191,10 +191,12 @@ POSTHOOK: Output: default@small_alltypesorc_a PREHOOK: query: ANALYZE TABLE small_alltypesorc_a COMPUTE STATISTICS FOR COLUMNS PREHOOK: type: QUERY PREHOOK: Input: default@small_alltypesorc_a +PREHOOK: Output: default@small_alltypesorc_a #### A masked pattern was here #### POSTHOOK: query: ANALYZE TABLE small_alltypesorc_a COMPUTE STATISTICS FOR COLUMNS POSTHOOK: type: QUERY POSTHOOK: Input: default@small_alltypesorc_a +POSTHOOK: Output: default@small_alltypesorc_a #### A masked pattern was here #### PREHOOK: query: select * from small_alltypesorc_a PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/llap/vector_outer_join4.q.out b/ql/src/test/results/clientpositive/llap/vector_outer_join4.q.out index ffce9e6..a55250b 100644 --- a/ql/src/test/results/clientpositive/llap/vector_outer_join4.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_outer_join4.q.out @@ -201,10 +201,12 @@ POSTHOOK: Output: default@small_alltypesorc_b PREHOOK: query: ANALYZE TABLE small_alltypesorc_b COMPUTE STATISTICS FOR COLUMNS PREHOOK: type: QUERY PREHOOK: Input: default@small_alltypesorc_b +PREHOOK: Output: default@small_alltypesorc_b #### A masked pattern was here #### POSTHOOK: query: ANALYZE TABLE small_alltypesorc_b COMPUTE STATISTICS FOR COLUMNS POSTHOOK: type: QUERY POSTHOOK: Input: default@small_alltypesorc_b +POSTHOOK: Output: default@small_alltypesorc_b #### A masked pattern was here #### PREHOOK: query: select * from small_alltypesorc_b PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/llap/vector_outer_join5.q.out b/ql/src/test/results/clientpositive/llap/vector_outer_join5.q.out index 4f25253..680ee42 100644 --- a/ql/src/test/results/clientpositive/llap/vector_outer_join5.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_outer_join5.q.out @@ -27,10 +27,12 @@ POSTHOOK: Output: default@sorted_mod_4 PREHOOK: query: ANALYZE TABLE sorted_mod_4 COMPUTE STATISTICS FOR COLUMNS PREHOOK: type: QUERY PREHOOK: Input: default@sorted_mod_4 +PREHOOK: Output: default@sorted_mod_4 #### A masked pattern was here #### POSTHOOK: query: ANALYZE TABLE sorted_mod_4 COMPUTE STATISTICS FOR COLUMNS POSTHOOK: type: QUERY POSTHOOK: Input: default@sorted_mod_4 +POSTHOOK: Output: default@sorted_mod_4 #### A masked pattern was here #### PREHOOK: query: create table small_table stored as orc as select ctinyint, cbigint from alltypesorc limit 100 @@ -57,10 +59,12 @@ POSTHOOK: Output: default@small_table PREHOOK: query: ANALYZE TABLE small_table COMPUTE STATISTICS FOR COLUMNS PREHOOK: type: QUERY PREHOOK: Input: default@small_table +PREHOOK: Output: default@small_table #### A masked pattern was here #### POSTHOOK: query: ANALYZE TABLE small_table COMPUTE STATISTICS FOR COLUMNS POSTHOOK: type: QUERY POSTHOOK: Input: default@small_table +POSTHOOK: Output: default@small_table #### A masked pattern was here #### PREHOOK: query: explain vectorization detail formatted select count(*) from (select s.*, st.* @@ -267,10 +271,12 @@ POSTHOOK: Output: default@mod_8_mod_4 PREHOOK: query: ANALYZE TABLE mod_8_mod_4 COMPUTE STATISTICS FOR COLUMNS PREHOOK: type: QUERY PREHOOK: Input: default@mod_8_mod_4 +PREHOOK: Output: default@mod_8_mod_4 #### A masked pattern was here #### POSTHOOK: query: ANALYZE TABLE mod_8_mod_4 COMPUTE STATISTICS FOR COLUMNS POSTHOOK: type: QUERY POSTHOOK: Input: default@mod_8_mod_4 +POSTHOOK: Output: default@mod_8_mod_4 #### A masked pattern was here #### PREHOOK: query: create table small_table2 stored as orc as select pmod(ctinyint, 16) as cmodtinyint, cbigint from alltypesorc limit 100 @@ -297,10 +303,12 @@ POSTHOOK: Output: default@small_table2 PREHOOK: query: ANALYZE TABLE small_table2 COMPUTE STATISTICS FOR COLUMNS PREHOOK: type: QUERY PREHOOK: Input: default@small_table2 +PREHOOK: Output: default@small_table2 #### A masked pattern was here #### POSTHOOK: query: ANALYZE TABLE small_table2 COMPUTE STATISTICS FOR COLUMNS POSTHOOK: type: QUERY POSTHOOK: Input: default@small_table2 +POSTHOOK: Output: default@small_table2 #### A masked pattern was here #### PREHOOK: query: explain vectorization detail formatted select count(*) from (select s.*, st.* diff --git a/ql/src/test/results/clientpositive/llap/vectorized_dynamic_semijoin_reduction.q.out b/ql/src/test/results/clientpositive/llap/vectorized_dynamic_semijoin_reduction.q.out index eb0f405..5f4735f 100644 --- a/ql/src/test/results/clientpositive/llap/vectorized_dynamic_semijoin_reduction.q.out +++ b/ql/src/test/results/clientpositive/llap/vectorized_dynamic_semijoin_reduction.q.out @@ -1632,10 +1632,12 @@ POSTHOOK: Output: default@dsrv_small PREHOOK: query: analyze table dsrv_small compute statistics for columns PREHOOK: type: QUERY PREHOOK: Input: default@dsrv_small +PREHOOK: Output: default@dsrv_small #### A masked pattern was here #### POSTHOOK: query: analyze table dsrv_small compute statistics for columns POSTHOOK: type: QUERY POSTHOOK: Input: default@dsrv_small +POSTHOOK: Output: default@dsrv_small #### A masked pattern was here #### PREHOOK: query: EXPLAIN select count(*) from dsrv_big a join dsrv_small b on (a.key_int = b.key_int) PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/llap/vectorized_dynamic_semijoin_reduction2.q.out b/ql/src/test/results/clientpositive/llap/vectorized_dynamic_semijoin_reduction2.q.out index b369e7c..d9fd706 100644 --- a/ql/src/test/results/clientpositive/llap/vectorized_dynamic_semijoin_reduction2.q.out +++ b/ql/src/test/results/clientpositive/llap/vectorized_dynamic_semijoin_reduction2.q.out @@ -73,18 +73,22 @@ POSTHOOK: Output: default@dsrv2_small PREHOOK: query: analyze table dsrv2_big compute statistics for columns PREHOOK: type: QUERY PREHOOK: Input: default@dsrv2_big +PREHOOK: Output: default@dsrv2_big #### A masked pattern was here #### POSTHOOK: query: analyze table dsrv2_big compute statistics for columns POSTHOOK: type: QUERY POSTHOOK: Input: default@dsrv2_big +POSTHOOK: Output: default@dsrv2_big #### A masked pattern was here #### PREHOOK: query: analyze table dsrv2_small compute statistics for columns PREHOOK: type: QUERY PREHOOK: Input: default@dsrv2_small +PREHOOK: Output: default@dsrv2_small #### A masked pattern was here #### POSTHOOK: query: analyze table dsrv2_small compute statistics for columns POSTHOOK: type: QUERY POSTHOOK: Input: default@dsrv2_small +POSTHOOK: Output: default@dsrv2_small #### A masked pattern was here #### PREHOOK: query: EXPLAIN select count(*) from dsrv2_big a join dsrv2_small b on (a.partkey_bigint = b.partkey_bigint) PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/metadata_only_queries.q.out b/ql/src/test/results/clientpositive/metadata_only_queries.q.out index 57b59dd..887f6d5 100644 --- a/ql/src/test/results/clientpositive/metadata_only_queries.q.out +++ b/ql/src/test/results/clientpositive/metadata_only_queries.q.out @@ -387,10 +387,12 @@ STAGE PLANS: PREHOOK: query: analyze table stats_tbl compute statistics for columns t,si,i,b,f,d,bo,s,bin PREHOOK: type: QUERY PREHOOK: Input: default@stats_tbl +PREHOOK: Output: default@stats_tbl #### A masked pattern was here #### POSTHOOK: query: analyze table stats_tbl compute statistics for columns t,si,i,b,f,d,bo,s,bin POSTHOOK: type: QUERY POSTHOOK: Input: default@stats_tbl +POSTHOOK: Output: default@stats_tbl #### A masked pattern was here #### PREHOOK: query: analyze table stats_tbl_part partition(dt='2010') compute statistics for columns t,si,i,b,f,d,bo,s,bin PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/partial_column_stats.q.out b/ql/src/test/results/clientpositive/partial_column_stats.q.out index 59b52b0..4cfab18 100644 --- a/ql/src/test/results/clientpositive/partial_column_stats.q.out +++ b/ql/src/test/results/clientpositive/partial_column_stats.q.out @@ -12,7 +12,8 @@ POSTHOOK: query: explain analyze table t1 compute statistics for columns POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-0 is a root stage - Stage-1 depends on stages: Stage-0 + Stage-1 depends on stages: Stage-0, Stage-2 + Stage-2 depends on stages: Stage-0 STAGE PLANS: Stage: Stage-0 @@ -55,13 +56,18 @@ STAGE PLANS: Column Types: int, string Table: default.t1 + Stage: Stage-2 + Stats-Aggr Operator + PREHOOK: query: analyze table t1 compute statistics for columns PREHOOK: type: QUERY PREHOOK: Input: default@t1 +PREHOOK: Output: default@t1 #### A masked pattern was here #### POSTHOOK: query: analyze table t1 compute statistics for columns POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 +POSTHOOK: Output: default@t1 #### A masked pattern was here #### PREHOOK: query: desc formatted t1 value PREHOOK: type: DESCTABLE diff --git a/ql/src/test/results/clientpositive/pcs.q.out b/ql/src/test/results/clientpositive/pcs.q.out index c422a3d..7794e7f 100644 --- a/ql/src/test/results/clientpositive/pcs.q.out +++ b/ql/src/test/results/clientpositive/pcs.q.out @@ -70,6 +70,10 @@ PREHOOK: Input: default@pcs_t1 PREHOOK: Input: default@pcs_t1@ds=2000-04-08 PREHOOK: Input: default@pcs_t1@ds=2000-04-09 PREHOOK: Input: default@pcs_t1@ds=2000-04-10 +PREHOOK: Output: default@pcs_t1 +PREHOOK: Output: default@pcs_t1@ds=2000-04-08 +PREHOOK: Output: default@pcs_t1@ds=2000-04-09 +PREHOOK: Output: default@pcs_t1@ds=2000-04-10 #### A masked pattern was here #### POSTHOOK: query: analyze table pcs_t1 partition(ds) compute statistics for columns POSTHOOK: type: QUERY @@ -77,6 +81,10 @@ POSTHOOK: Input: default@pcs_t1 POSTHOOK: Input: default@pcs_t1@ds=2000-04-08 POSTHOOK: Input: default@pcs_t1@ds=2000-04-09 POSTHOOK: Input: default@pcs_t1@ds=2000-04-10 +POSTHOOK: Output: default@pcs_t1 +POSTHOOK: Output: default@pcs_t1@ds=2000-04-08 +POSTHOOK: Output: default@pcs_t1@ds=2000-04-09 +POSTHOOK: Output: default@pcs_t1@ds=2000-04-10 #### A masked pattern was here #### PREHOOK: query: explain extended select key, value, ds from pcs_t1 where (ds='2000-04-08' and key=1) or (ds='2000-04-09' and key=2) order by key, value, ds PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/remove_exprs_stats.q.out b/ql/src/test/results/clientpositive/remove_exprs_stats.q.out index 567e6b2..b9d1351 100644 --- a/ql/src/test/results/clientpositive/remove_exprs_stats.q.out +++ b/ql/src/test/results/clientpositive/remove_exprs_stats.q.out @@ -55,10 +55,12 @@ POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name PREHOOK: query: analyze table loc_orc compute statistics for columns state,locid,zip,year PREHOOK: type: QUERY PREHOOK: Input: default@loc_orc +PREHOOK: Output: default@loc_orc #### A masked pattern was here #### POSTHOOK: query: analyze table loc_orc compute statistics for columns state,locid,zip,year POSTHOOK: type: QUERY POSTHOOK: Input: default@loc_orc +POSTHOOK: Output: default@loc_orc #### A masked pattern was here #### PREHOOK: query: explain select * from loc_orc where locid < 30 PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/rename_table_update_column_stats.q.out b/ql/src/test/results/clientpositive/rename_table_update_column_stats.q.out index c025cfa..2f13760 100644 --- a/ql/src/test/results/clientpositive/rename_table_update_column_stats.q.out +++ b/ql/src/test/results/clientpositive/rename_table_update_column_stats.q.out @@ -47,10 +47,12 @@ PREHOOK: query: analyze table testtable1 compute statistics for columns PREHOOK: type: QUERY PREHOOK: Input: statsdb1@testtable1 #### A masked pattern was here #### +PREHOOK: Output: statsdb1@testtable1 POSTHOOK: query: analyze table testtable1 compute statistics for columns POSTHOOK: type: QUERY POSTHOOK: Input: statsdb1@testtable1 #### A masked pattern was here #### +POSTHOOK: Output: statsdb1@testtable1 PREHOOK: query: describe formatted statsdb1.testtable1 col1 PREHOOK: type: DESCTABLE PREHOOK: Input: statsdb1@testtable1 @@ -187,10 +189,12 @@ PREHOOK: query: analyze table testtable1 compute statistics for columns PREHOOK: type: QUERY PREHOOK: Input: statsdb1@testtable1 #### A masked pattern was here #### +PREHOOK: Output: statsdb1@testtable1 POSTHOOK: query: analyze table testtable1 compute statistics for columns POSTHOOK: type: QUERY POSTHOOK: Input: statsdb1@testtable1 #### A masked pattern was here #### +POSTHOOK: Output: statsdb1@testtable1 PREHOOK: query: describe formatted statsdb1.testtable1 col1 PREHOOK: type: DESCTABLE PREHOOK: Input: statsdb1@testtable1 diff --git a/ql/src/test/results/clientpositive/spark/annotate_stats_join.q.out b/ql/src/test/results/clientpositive/spark/annotate_stats_join.q.out index 31ccc5c..a93ae48 100644 --- a/ql/src/test/results/clientpositive/spark/annotate_stats_join.q.out +++ b/ql/src/test/results/clientpositive/spark/annotate_stats_join.q.out @@ -97,26 +97,32 @@ POSTHOOK: Output: default@loc PREHOOK: query: analyze table emp compute statistics for columns lastname,deptid,locid PREHOOK: type: QUERY PREHOOK: Input: default@emp +PREHOOK: Output: default@emp #### A masked pattern was here #### POSTHOOK: query: analyze table emp compute statistics for columns lastname,deptid,locid POSTHOOK: type: QUERY POSTHOOK: Input: default@emp +POSTHOOK: Output: default@emp #### A masked pattern was here #### PREHOOK: query: analyze table dept compute statistics for columns deptname,deptid PREHOOK: type: QUERY PREHOOK: Input: default@dept +PREHOOK: Output: default@dept #### A masked pattern was here #### POSTHOOK: query: analyze table dept compute statistics for columns deptname,deptid POSTHOOK: type: QUERY POSTHOOK: Input: default@dept +POSTHOOK: Output: default@dept #### A masked pattern was here #### PREHOOK: query: analyze table loc compute statistics for columns state,locid,zip,year PREHOOK: type: QUERY PREHOOK: Input: default@loc +PREHOOK: Output: default@loc #### A masked pattern was here #### POSTHOOK: query: analyze table loc compute statistics for columns state,locid,zip,year POSTHOOK: type: QUERY POSTHOOK: Input: default@loc +POSTHOOK: Output: default@loc #### A masked pattern was here #### PREHOOK: query: explain select * from emp e join dept d on (e.deptid = d.deptid) PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/spark/avro_decimal_native.q.out b/ql/src/test/results/clientpositive/spark/avro_decimal_native.q.out index cebc342..1fd98fa 100644 --- a/ql/src/test/results/clientpositive/spark/avro_decimal_native.q.out +++ b/ql/src/test/results/clientpositive/spark/avro_decimal_native.q.out @@ -25,10 +25,12 @@ POSTHOOK: Output: default@dec PREHOOK: query: ANALYZE TABLE dec COMPUTE STATISTICS FOR COLUMNS value PREHOOK: type: QUERY PREHOOK: Input: default@dec +PREHOOK: Output: default@dec #### A masked pattern was here #### POSTHOOK: query: ANALYZE TABLE dec COMPUTE STATISTICS FOR COLUMNS value POSTHOOK: type: QUERY POSTHOOK: Input: default@dec +POSTHOOK: Output: default@dec #### A masked pattern was here #### PREHOOK: query: DESC FORMATTED dec value PREHOOK: type: DESCTABLE diff --git a/ql/src/test/results/clientpositive/spark/limit_pushdown.q.out b/ql/src/test/results/clientpositive/spark/limit_pushdown.q.out index c81240d..1b9fd14 100644 --- a/ql/src/test/results/clientpositive/spark/limit_pushdown.q.out +++ b/ql/src/test/results/clientpositive/spark/limit_pushdown.q.out @@ -369,21 +369,21 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesorc - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cdouble (type: double) outputColumnNames: cdouble - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Group By Operator keys: cdouble (type: double) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: double) sort order: + Map-reduce partition columns: _col0 (type: double) - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.3 Reducer 2 Reduce Operator Tree: @@ -391,13 +391,13 @@ STAGE PLANS: keys: KEY._col0 (type: double) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 6144 Data size: 188618 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 20 - Statistics: Num rows: 20 Data size: 600 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 4300 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 20 Data size: 600 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 4300 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -458,40 +458,40 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesorc - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ctinyint (type: tinyint), cdouble (type: double) outputColumnNames: ctinyint, cdouble - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Group By Operator keys: ctinyint (type: tinyint), cdouble (type: double) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: tinyint), _col1 (type: double) sort order: ++ Map-reduce partition columns: _col0 (type: tinyint) - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: tinyint), KEY._col1 (type: double) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 6144 Data size: 188618 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(_col1) keys: _col0 (type: tinyint) mode: complete outputColumnNames: _col0, _col1 - Statistics: Num rows: 3072 Data size: 94309 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3072 Data size: 660491 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 20 - Statistics: Num rows: 20 Data size: 600 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 4300 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 20 Data size: 600 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 4300 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -552,40 +552,40 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesorc - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ctinyint (type: tinyint), cdouble (type: double) outputColumnNames: ctinyint, cdouble - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Group By Operator keys: ctinyint (type: tinyint), cdouble (type: double) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: tinyint), _col1 (type: double) sort order: ++ Map-reduce partition columns: _col0 (type: tinyint) - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: tinyint), KEY._col1 (type: double) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 6144 Data size: 188618 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(_col1) keys: _col0 (type: tinyint) mode: complete outputColumnNames: _col0, _col1 - Statistics: Num rows: 3072 Data size: 94309 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3072 Data size: 660491 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 20 - Statistics: Num rows: 20 Data size: 600 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 4300 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 20 Data size: 600 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 4300 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -646,22 +646,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesorc - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ctinyint (type: tinyint), cstring1 (type: string), cstring2 (type: string) outputColumnNames: ctinyint, cstring1, cstring2 - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(DISTINCT cstring1), count(DISTINCT cstring2) keys: ctinyint (type: tinyint), cstring1 (type: string), cstring2 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: tinyint), _col1 (type: string), _col2 (type: string) sort order: +++ Map-reduce partition columns: _col0 (type: tinyint) - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.3 Reducer 2 Reduce Operator Tree: @@ -670,13 +670,13 @@ STAGE PLANS: keys: KEY._col0 (type: tinyint) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6144 Data size: 188618 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 20 - Statistics: Num rows: 20 Data size: 600 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 4300 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 20 Data size: 600 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 4300 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/spark/metadata_only_queries.q.out b/ql/src/test/results/clientpositive/spark/metadata_only_queries.q.out index 543d0ef..5d2eaa0 100644 --- a/ql/src/test/results/clientpositive/spark/metadata_only_queries.q.out +++ b/ql/src/test/results/clientpositive/spark/metadata_only_queries.q.out @@ -411,10 +411,12 @@ STAGE PLANS: PREHOOK: query: analyze table stats_tbl compute statistics for columns t,si,i,b,f,d,bo,s,bin PREHOOK: type: QUERY PREHOOK: Input: default@stats_tbl +PREHOOK: Output: default@stats_tbl #### A masked pattern was here #### POSTHOOK: query: analyze table stats_tbl compute statistics for columns t,si,i,b,f,d,bo,s,bin POSTHOOK: type: QUERY POSTHOOK: Input: default@stats_tbl +POSTHOOK: Output: default@stats_tbl #### A masked pattern was here #### PREHOOK: query: analyze table stats_tbl_part partition(dt='2010') compute statistics for columns t,si,i,b,f,d,bo,s,bin PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/spark/stats_only_null.q.out b/ql/src/test/results/clientpositive/spark/stats_only_null.q.out index 359eea3..c439958 100644 --- a/ql/src/test/results/clientpositive/spark/stats_only_null.q.out +++ b/ql/src/test/results/clientpositive/spark/stats_only_null.q.out @@ -181,10 +181,12 @@ STAGE PLANS: PREHOOK: query: analyze table stats_null compute statistics for columns a,b,c,d PREHOOK: type: QUERY PREHOOK: Input: default@stats_null +PREHOOK: Output: default@stats_null #### A masked pattern was here #### POSTHOOK: query: analyze table stats_null compute statistics for columns a,b,c,d POSTHOOK: type: QUERY POSTHOOK: Input: default@stats_null +POSTHOOK: Output: default@stats_null #### A masked pattern was here #### PREHOOK: query: analyze table stats_null_part partition(dt='2010') compute statistics for columns a,b,c,d PREHOOK: type: QUERY @@ -376,12 +378,18 @@ PREHOOK: type: QUERY PREHOOK: Input: default@stats_null_part PREHOOK: Input: default@stats_null_part@dt=1 PREHOOK: Input: default@stats_null_part@dt=__HIVE_DEFAULT_PARTITION__ +PREHOOK: Output: default@stats_null_part +PREHOOK: Output: default@stats_null_part@dt=1 +PREHOOK: Output: default@stats_null_part@dt=__HIVE_DEFAULT_PARTITION__ #### A masked pattern was here #### POSTHOOK: query: analyze table stats_null_part compute statistics for columns POSTHOOK: type: QUERY POSTHOOK: Input: default@stats_null_part POSTHOOK: Input: default@stats_null_part@dt=1 POSTHOOK: Input: default@stats_null_part@dt=__HIVE_DEFAULT_PARTITION__ +POSTHOOK: Output: default@stats_null_part +POSTHOOK: Output: default@stats_null_part@dt=1 +POSTHOOK: Output: default@stats_null_part@dt=__HIVE_DEFAULT_PARTITION__ #### A masked pattern was here #### PREHOOK: query: describe formatted stats_null_part partition(dt = 1) a PREHOOK: type: DESCTABLE diff --git a/ql/src/test/results/clientpositive/spark/vector_elt.q.out b/ql/src/test/results/clientpositive/spark/vector_elt.q.out index b49462a..00f5292 100644 --- a/ql/src/test/results/clientpositive/spark/vector_elt.q.out +++ b/ql/src/test/results/clientpositive/spark/vector_elt.q.out @@ -23,7 +23,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesorc - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] @@ -33,7 +33,7 @@ STAGE PLANS: native: true predicateExpression: FilterLongColGreaterLongScalar(col 0, val 0) -> boolean predicate: (ctinyint > 0) (type: boolean) - Statistics: Num rows: 4096 Data size: 125745 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4096 Data size: 880654 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ((UDFToInteger(ctinyint) % 2) + 1) (type: int), cstring1 (type: string), cint (type: int), elt(((UDFToInteger(ctinyint) % 2) + 1), cstring1, cint) (type: string) outputColumnNames: _col0, _col1, _col2, _col3 @@ -42,19 +42,19 @@ STAGE PLANS: native: true projectedOutputColumns: [13, 6, 2, 16] selectExpressions: LongColAddLongScalar(col 12, val 1)(children: LongColModuloLongScalar(col 0, val 2)(children: col 0) -> 12:long) -> 13:long, VectorElt(columns [14, 6, 15])(children: LongColAddLongScalar(col 12, val 1)(children: LongColModuloLongScalar(col 0, val 2)(children: col 0) -> 12:long) -> 14:long, col 6, CastLongToString(col 2) -> 15:String) -> 16:string - Statistics: Num rows: 4096 Data size: 125745 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4096 Data size: 880654 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 10 Limit Vectorization: className: VectorLimitOperator native: true - Statistics: Num rows: 10 Data size: 300 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 2150 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 10 Data size: 300 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 2150 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -140,7 +140,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesorc - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] diff --git a/ql/src/test/results/clientpositive/spark/vector_left_outer_join.q.out b/ql/src/test/results/clientpositive/spark/vector_left_outer_join.q.out index 91af229..f171405 100644 --- a/ql/src/test/results/clientpositive/spark/vector_left_outer_join.q.out +++ b/ql/src/test/results/clientpositive/spark/vector_left_outer_join.q.out @@ -34,11 +34,11 @@ STAGE PLANS: Map Operator Tree: TableScan alias: cd - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cint (type: int) outputColumnNames: _col0 - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: 0 _col1 (type: int) @@ -58,11 +58,11 @@ STAGE PLANS: Map Operator Tree: TableScan alias: hd - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ctinyint (type: tinyint) outputColumnNames: _col0 - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: 0 _col0 (type: tinyint) @@ -89,11 +89,11 @@ STAGE PLANS: Map Operator Tree: TableScan alias: c - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ctinyint (type: tinyint), cint (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Left Outer Join0 to 1 @@ -103,7 +103,7 @@ STAGE PLANS: outputColumnNames: _col0 input vertices: 1 Map 3 - Statistics: Num rows: 13516 Data size: 414960 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13516 Data size: 2906160 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Left Outer Join0 to 1 @@ -112,7 +112,7 @@ STAGE PLANS: 1 _col0 (type: tinyint) input vertices: 1 Map 4 - Statistics: Num rows: 14867 Data size: 456456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 14867 Data size: 3196776 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() mode: hash diff --git a/ql/src/test/results/clientpositive/spark/vector_outer_join1.q.out b/ql/src/test/results/clientpositive/spark/vector_outer_join1.q.out index 169b0f8..82a11d2 100644 --- a/ql/src/test/results/clientpositive/spark/vector_outer_join1.q.out +++ b/ql/src/test/results/clientpositive/spark/vector_outer_join1.q.out @@ -186,10 +186,12 @@ POSTHOOK: Output: default@small_alltypesorc_a PREHOOK: query: ANALYZE TABLE small_alltypesorc_a COMPUTE STATISTICS FOR COLUMNS PREHOOK: type: QUERY PREHOOK: Input: default@small_alltypesorc_a +PREHOOK: Output: default@small_alltypesorc_a #### A masked pattern was here #### POSTHOOK: query: ANALYZE TABLE small_alltypesorc_a COMPUTE STATISTICS FOR COLUMNS POSTHOOK: type: QUERY POSTHOOK: Input: default@small_alltypesorc_a +POSTHOOK: Output: default@small_alltypesorc_a #### A masked pattern was here #### PREHOOK: query: select * from small_alltypesorc_a PREHOOK: type: QUERY @@ -244,7 +246,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: cd - Statistics: Num rows: 15 Data size: 4003 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 15 Data size: 3483 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] @@ -255,7 +257,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] - Statistics: Num rows: 15 Data size: 4003 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 15 Data size: 3483 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator Spark Hash Table Sink Vectorization: className: VectorSparkHashTableSinkOperator @@ -288,7 +290,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: c - Statistics: Num rows: 15 Data size: 4003 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 15 Data size: 3483 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] @@ -299,7 +301,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] - Statistics: Num rows: 15 Data size: 4003 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 15 Data size: 3483 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Left Outer Join0 to 1 @@ -319,13 +321,13 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23 input vertices: 1 Map 2 - Statistics: Num rows: 16 Data size: 4403 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 16 Data size: 3831 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 16 Data size: 4403 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 16 Data size: 3831 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -417,7 +419,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: hd - Statistics: Num rows: 15 Data size: 4003 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 15 Data size: 3483 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] @@ -428,7 +430,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [0] - Statistics: Num rows: 15 Data size: 4003 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 15 Data size: 3483 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator Spark Hash Table Sink Vectorization: className: VectorSparkHashTableSinkOperator @@ -461,7 +463,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: c - Statistics: Num rows: 15 Data size: 4003 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 15 Data size: 3483 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] @@ -472,7 +474,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [0] - Statistics: Num rows: 15 Data size: 4003 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 15 Data size: 3483 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Left Outer Join0 to 1 @@ -490,13 +492,13 @@ STAGE PLANS: outputColumnNames: _col0 input vertices: 1 Map 2 - Statistics: Num rows: 16 Data size: 4403 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 16 Data size: 3831 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 16 Data size: 4403 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 16 Data size: 3831 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -679,7 +681,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: cd - Statistics: Num rows: 15 Data size: 4003 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 15 Data size: 3483 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] @@ -690,7 +692,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [2] - Statistics: Num rows: 15 Data size: 4003 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 15 Data size: 3483 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator Spark Hash Table Sink Vectorization: className: VectorSparkHashTableSinkOperator @@ -718,7 +720,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: hd - Statistics: Num rows: 15 Data size: 4003 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 15 Data size: 3483 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] @@ -729,7 +731,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [0] - Statistics: Num rows: 15 Data size: 4003 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 15 Data size: 3483 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator Spark Hash Table Sink Vectorization: className: VectorSparkHashTableSinkOperator @@ -764,7 +766,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: c - Statistics: Num rows: 15 Data size: 4003 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 15 Data size: 3483 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] @@ -775,7 +777,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [0, 2] - Statistics: Num rows: 15 Data size: 4003 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 15 Data size: 3483 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Left Outer Join0 to 1 @@ -793,7 +795,7 @@ STAGE PLANS: outputColumnNames: _col0 input vertices: 1 Map 3 - Statistics: Num rows: 16 Data size: 4403 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 16 Data size: 3831 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Left Outer Join0 to 1 @@ -811,7 +813,7 @@ STAGE PLANS: outputColumnNames: _col0 input vertices: 1 Map 4 - Statistics: Num rows: 17 Data size: 4843 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 17 Data size: 4214 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(), sum(_col0) Group By Vectorization: diff --git a/ql/src/test/results/clientpositive/spark/vector_outer_join2.q.out b/ql/src/test/results/clientpositive/spark/vector_outer_join2.q.out index d56c4ab..8f450fb 100644 --- a/ql/src/test/results/clientpositive/spark/vector_outer_join2.q.out +++ b/ql/src/test/results/clientpositive/spark/vector_outer_join2.q.out @@ -191,10 +191,12 @@ POSTHOOK: Output: default@small_alltypesorc_a PREHOOK: query: ANALYZE TABLE small_alltypesorc_a COMPUTE STATISTICS FOR COLUMNS PREHOOK: type: QUERY PREHOOK: Input: default@small_alltypesorc_a +PREHOOK: Output: default@small_alltypesorc_a #### A masked pattern was here #### POSTHOOK: query: ANALYZE TABLE small_alltypesorc_a COMPUTE STATISTICS FOR COLUMNS POSTHOOK: type: QUERY POSTHOOK: Input: default@small_alltypesorc_a +POSTHOOK: Output: default@small_alltypesorc_a #### A masked pattern was here #### PREHOOK: query: select * from small_alltypesorc_a PREHOOK: type: QUERY @@ -260,7 +262,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: cd - Statistics: Num rows: 20 Data size: 5237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 4431 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] @@ -271,7 +273,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [2] - Statistics: Num rows: 20 Data size: 5237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 4431 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator Spark Hash Table Sink Vectorization: className: VectorSparkHashTableSinkOperator @@ -299,7 +301,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: hd - Statistics: Num rows: 20 Data size: 5237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 4431 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] @@ -310,7 +312,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [3] - Statistics: Num rows: 20 Data size: 5237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 4431 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator Spark Hash Table Sink Vectorization: className: VectorSparkHashTableSinkOperator @@ -345,7 +347,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: c - Statistics: Num rows: 20 Data size: 5237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 4431 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] @@ -356,7 +358,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [2, 3] - Statistics: Num rows: 20 Data size: 5237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 4431 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Left Outer Join0 to 1 @@ -374,7 +376,7 @@ STAGE PLANS: outputColumnNames: _col1 input vertices: 1 Map 3 - Statistics: Num rows: 22 Data size: 5760 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 22 Data size: 4874 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Left Outer Join0 to 1 @@ -392,7 +394,7 @@ STAGE PLANS: outputColumnNames: _col1 input vertices: 1 Map 4 - Statistics: Num rows: 24 Data size: 6336 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 5361 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(), sum(_col1) Group By Vectorization: diff --git a/ql/src/test/results/clientpositive/spark/vector_outer_join3.q.out b/ql/src/test/results/clientpositive/spark/vector_outer_join3.q.out index dbbfd34..f8d1ec2 100644 --- a/ql/src/test/results/clientpositive/spark/vector_outer_join3.q.out +++ b/ql/src/test/results/clientpositive/spark/vector_outer_join3.q.out @@ -191,10 +191,12 @@ POSTHOOK: Output: default@small_alltypesorc_a PREHOOK: query: ANALYZE TABLE small_alltypesorc_a COMPUTE STATISTICS FOR COLUMNS PREHOOK: type: QUERY PREHOOK: Input: default@small_alltypesorc_a +PREHOOK: Output: default@small_alltypesorc_a #### A masked pattern was here #### POSTHOOK: query: ANALYZE TABLE small_alltypesorc_a COMPUTE STATISTICS FOR COLUMNS POSTHOOK: type: QUERY POSTHOOK: Input: default@small_alltypesorc_a +POSTHOOK: Output: default@small_alltypesorc_a #### A masked pattern was here #### PREHOOK: query: select * from small_alltypesorc_a PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/spark/vector_outer_join4.q.out b/ql/src/test/results/clientpositive/spark/vector_outer_join4.q.out index ffce9e6..a55250b 100644 --- a/ql/src/test/results/clientpositive/spark/vector_outer_join4.q.out +++ b/ql/src/test/results/clientpositive/spark/vector_outer_join4.q.out @@ -201,10 +201,12 @@ POSTHOOK: Output: default@small_alltypesorc_b PREHOOK: query: ANALYZE TABLE small_alltypesorc_b COMPUTE STATISTICS FOR COLUMNS PREHOOK: type: QUERY PREHOOK: Input: default@small_alltypesorc_b +PREHOOK: Output: default@small_alltypesorc_b #### A masked pattern was here #### POSTHOOK: query: ANALYZE TABLE small_alltypesorc_b COMPUTE STATISTICS FOR COLUMNS POSTHOOK: type: QUERY POSTHOOK: Input: default@small_alltypesorc_b +POSTHOOK: Output: default@small_alltypesorc_b #### A masked pattern was here #### PREHOOK: query: select * from small_alltypesorc_b PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/spark/vector_outer_join5.q.out b/ql/src/test/results/clientpositive/spark/vector_outer_join5.q.out index 4f25253..680ee42 100644 --- a/ql/src/test/results/clientpositive/spark/vector_outer_join5.q.out +++ b/ql/src/test/results/clientpositive/spark/vector_outer_join5.q.out @@ -27,10 +27,12 @@ POSTHOOK: Output: default@sorted_mod_4 PREHOOK: query: ANALYZE TABLE sorted_mod_4 COMPUTE STATISTICS FOR COLUMNS PREHOOK: type: QUERY PREHOOK: Input: default@sorted_mod_4 +PREHOOK: Output: default@sorted_mod_4 #### A masked pattern was here #### POSTHOOK: query: ANALYZE TABLE sorted_mod_4 COMPUTE STATISTICS FOR COLUMNS POSTHOOK: type: QUERY POSTHOOK: Input: default@sorted_mod_4 +POSTHOOK: Output: default@sorted_mod_4 #### A masked pattern was here #### PREHOOK: query: create table small_table stored as orc as select ctinyint, cbigint from alltypesorc limit 100 @@ -57,10 +59,12 @@ POSTHOOK: Output: default@small_table PREHOOK: query: ANALYZE TABLE small_table COMPUTE STATISTICS FOR COLUMNS PREHOOK: type: QUERY PREHOOK: Input: default@small_table +PREHOOK: Output: default@small_table #### A masked pattern was here #### POSTHOOK: query: ANALYZE TABLE small_table COMPUTE STATISTICS FOR COLUMNS POSTHOOK: type: QUERY POSTHOOK: Input: default@small_table +POSTHOOK: Output: default@small_table #### A masked pattern was here #### PREHOOK: query: explain vectorization detail formatted select count(*) from (select s.*, st.* @@ -267,10 +271,12 @@ POSTHOOK: Output: default@mod_8_mod_4 PREHOOK: query: ANALYZE TABLE mod_8_mod_4 COMPUTE STATISTICS FOR COLUMNS PREHOOK: type: QUERY PREHOOK: Input: default@mod_8_mod_4 +PREHOOK: Output: default@mod_8_mod_4 #### A masked pattern was here #### POSTHOOK: query: ANALYZE TABLE mod_8_mod_4 COMPUTE STATISTICS FOR COLUMNS POSTHOOK: type: QUERY POSTHOOK: Input: default@mod_8_mod_4 +POSTHOOK: Output: default@mod_8_mod_4 #### A masked pattern was here #### PREHOOK: query: create table small_table2 stored as orc as select pmod(ctinyint, 16) as cmodtinyint, cbigint from alltypesorc limit 100 @@ -297,10 +303,12 @@ POSTHOOK: Output: default@small_table2 PREHOOK: query: ANALYZE TABLE small_table2 COMPUTE STATISTICS FOR COLUMNS PREHOOK: type: QUERY PREHOOK: Input: default@small_table2 +PREHOOK: Output: default@small_table2 #### A masked pattern was here #### POSTHOOK: query: ANALYZE TABLE small_table2 COMPUTE STATISTICS FOR COLUMNS POSTHOOK: type: QUERY POSTHOOK: Input: default@small_table2 +POSTHOOK: Output: default@small_table2 #### A masked pattern was here #### PREHOOK: query: explain vectorization detail formatted select count(*) from (select s.*, st.* diff --git a/ql/src/test/results/clientpositive/spark/vectorization_0.q.out b/ql/src/test/results/clientpositive/spark/vectorization_0.q.out index 714c21f..947d50d 100644 --- a/ql/src/test/results/clientpositive/spark/vectorization_0.q.out +++ b/ql/src/test/results/clientpositive/spark/vectorization_0.q.out @@ -34,7 +34,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesorc - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] @@ -45,7 +45,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [0] - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: min(ctinyint), max(ctinyint), count(ctinyint), count() Group By Vectorization: @@ -190,7 +190,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesorc - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] @@ -201,7 +201,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [0] - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(ctinyint) Group By Vectorization: @@ -355,11 +355,11 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesorc - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ctinyint (type: tinyint) outputColumnNames: ctinyint - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: avg(ctinyint), variance(ctinyint), var_pop(ctinyint), var_samp(ctinyint), std(ctinyint), stddev(ctinyint), stddev_pop(ctinyint), stddev_samp(ctinyint) mode: hash @@ -488,7 +488,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesorc - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] @@ -499,7 +499,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [3] - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: min(cbigint), max(cbigint), count(cbigint), count() Group By Vectorization: @@ -644,7 +644,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesorc - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] @@ -655,7 +655,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [3] - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(cbigint) Group By Vectorization: @@ -809,11 +809,11 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesorc - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cbigint (type: bigint) outputColumnNames: cbigint - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: avg(cbigint), variance(cbigint), var_pop(cbigint), var_samp(cbigint), std(cbigint), stddev(cbigint), stddev_pop(cbigint), stddev_samp(cbigint) mode: hash @@ -942,7 +942,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesorc - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] @@ -953,7 +953,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [4] - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: min(cfloat), max(cfloat), count(cfloat), count() Group By Vectorization: @@ -1098,7 +1098,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesorc - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] @@ -1109,7 +1109,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [4] - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(cfloat) Group By Vectorization: @@ -1263,11 +1263,11 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesorc - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cfloat (type: float) outputColumnNames: cfloat - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: avg(cfloat), variance(cfloat), var_pop(cfloat), var_samp(cfloat), std(cfloat), stddev(cfloat), stddev_pop(cfloat), stddev_samp(cfloat) mode: hash @@ -1434,7 +1434,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesorc - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] @@ -1444,7 +1444,7 @@ STAGE PLANS: native: true predicateExpression: FilterExprOrExpr(children: FilterStringColLikeStringScalar(col 7, pattern %b%) -> boolean, FilterDecimalScalarNotEqualDecimalColumn(val 79.553, col 12)(children: CastLongToDecimal(col 2) -> 12:decimal(13,3)) -> boolean, FilterDoubleColLessDoubleColumn(col 13, col 5)(children: CastLongToDouble(col 3) -> 13:double) -> boolean, FilterExprAndExpr(children: FilterLongColGreaterEqualLongColumn(col 0, col 1)(children: col 0) -> boolean, FilterLongColEqualLongScalar(col 11, val 1) -> boolean, FilterLongScalarEqualLongColumn(val 3569, col 0)(children: col 0) -> boolean) -> boolean) -> boolean predicate: ((cstring2 like '%b%') or (79.553 <> CAST( cint AS decimal(13,3))) or (UDFToDouble(cbigint) < cdouble) or ((UDFToShort(ctinyint) >= csmallint) and (cboolean2 = 1) and (3569 = UDFToInteger(ctinyint)))) (type: boolean) - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cbigint (type: bigint), cfloat (type: float), ctinyint (type: tinyint) outputColumnNames: cbigint, cfloat, ctinyint @@ -1452,7 +1452,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [3, 4, 0] - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: avg(cbigint), stddev_pop(cbigint), var_samp(cbigint), count(), sum(cfloat), min(ctinyint) Group By Vectorization: @@ -1591,14 +1591,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesorc - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: ((cstring1 like 'a%') or (cstring1 like 'b%') or (cstring1 like 'c%') or ((length(cstring1) < 50) and (cstring1 like '%n') and (length(cstring1) > 0))) (type: boolean) - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Select Operator - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() mode: hash @@ -1631,7 +1631,7 @@ STAGE PLANS: name default.alltypesorc numFiles 1 numRows 12288 - rawDataSize 0 + rawDataSize 2641964 serialization.ddl struct alltypesorc { byte ctinyint, i16 csmallint, i32 cint, i64 cbigint, float cfloat, double cdouble, string cstring1, string cstring2, timestamp ctimestamp1, timestamp ctimestamp2, bool cboolean1, bool cboolean2} serialization.format 1 serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde @@ -1652,7 +1652,7 @@ STAGE PLANS: name default.alltypesorc numFiles 1 numRows 12288 - rawDataSize 0 + rawDataSize 2641964 serialization.ddl struct alltypesorc { byte ctinyint, i16 csmallint, i32 cint, i64 cbigint, float cfloat, double cdouble, string cstring1, string cstring2, timestamp ctimestamp1, timestamp ctimestamp2, bool cboolean1, bool cboolean2} serialization.format 1 serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde @@ -30399,22 +30399,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesorc - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: (((cint = 49) and (cfloat = 3.5)) or ((cint = 47) and (cfloat = 2.09)) or ((cint = 45) and (cfloat = 3.02))) (type: boolean) - Statistics: Num rows: 9216 Data size: 282927 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 9216 Data size: 1981473 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cbigint (type: bigint), cfloat (type: float), cdouble (type: double), cstring1 (type: string), cstring2 (type: string), ctimestamp1 (type: timestamp), ctimestamp2 (type: timestamp), cboolean1 (type: boolean), cboolean2 (type: boolean) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 - Statistics: Num rows: 9216 Data size: 282927 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 9216 Data size: 1981473 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 9216 Data size: 282927 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 9216 Data size: 1981473 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -30451,7 +30451,7 @@ STAGE PLANS: name default.alltypesorc numFiles 1 numRows 12288 - rawDataSize 0 + rawDataSize 2641964 serialization.ddl struct alltypesorc { byte ctinyint, i16 csmallint, i32 cint, i64 cbigint, float cfloat, double cdouble, string cstring1, string cstring2, timestamp ctimestamp1, timestamp ctimestamp2, bool cboolean1, bool cboolean2} serialization.format 1 serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde @@ -30472,7 +30472,7 @@ STAGE PLANS: name default.alltypesorc numFiles 1 numRows 12288 - rawDataSize 0 + rawDataSize 2641964 serialization.ddl struct alltypesorc { byte ctinyint, i16 csmallint, i32 cint, i64 cbigint, float cfloat, double cdouble, string cstring1, string cstring2, timestamp ctimestamp1, timestamp ctimestamp2, bool cboolean1, bool cboolean2} serialization.format 1 serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde @@ -30513,22 +30513,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesorc - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: (((cint = 49) and (cfloat = 3.5)) or ((cint = 47) and (cfloat = 2.09)) or ((cint = 45) and (cfloat = 3.02))) (type: boolean) - Statistics: Num rows: 9216 Data size: 282927 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 9216 Data size: 1981473 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cbigint (type: bigint), cfloat (type: float), cdouble (type: double), cstring1 (type: string), cstring2 (type: string), ctimestamp1 (type: timestamp), ctimestamp2 (type: timestamp), cboolean1 (type: boolean), cboolean2 (type: boolean) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 - Statistics: Num rows: 9216 Data size: 282927 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 9216 Data size: 1981473 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 9216 Data size: 282927 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 9216 Data size: 1981473 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -30565,7 +30565,7 @@ STAGE PLANS: name default.alltypesorc numFiles 1 numRows 12288 - rawDataSize 0 + rawDataSize 2641964 serialization.ddl struct alltypesorc { byte ctinyint, i16 csmallint, i32 cint, i64 cbigint, float cfloat, double cdouble, string cstring1, string cstring2, timestamp ctimestamp1, timestamp ctimestamp2, bool cboolean1, bool cboolean2} serialization.format 1 serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde @@ -30586,7 +30586,7 @@ STAGE PLANS: name default.alltypesorc numFiles 1 numRows 12288 - rawDataSize 0 + rawDataSize 2641964 serialization.ddl struct alltypesorc { byte ctinyint, i16 csmallint, i32 cint, i64 cbigint, float cfloat, double cdouble, string cstring1, string cstring2, timestamp ctimestamp1, timestamp ctimestamp2, bool cboolean1, bool cboolean2} serialization.format 1 serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde @@ -30627,22 +30627,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesorc - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: (((cint = 49) or (cfloat = 3.5)) and ((cint = 47) or (cfloat = 2.09)) and ((cint = 45) or (cfloat = 3.02))) (type: boolean) - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cbigint (type: bigint), cfloat (type: float), cdouble (type: double), cstring1 (type: string), cstring2 (type: string), ctimestamp1 (type: timestamp), ctimestamp2 (type: timestamp), cboolean1 (type: boolean), cboolean2 (type: boolean) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -30679,7 +30679,7 @@ STAGE PLANS: name default.alltypesorc numFiles 1 numRows 12288 - rawDataSize 0 + rawDataSize 2641964 serialization.ddl struct alltypesorc { byte ctinyint, i16 csmallint, i32 cint, i64 cbigint, float cfloat, double cdouble, string cstring1, string cstring2, timestamp ctimestamp1, timestamp ctimestamp2, bool cboolean1, bool cboolean2} serialization.format 1 serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde @@ -30700,7 +30700,7 @@ STAGE PLANS: name default.alltypesorc numFiles 1 numRows 12288 - rawDataSize 0 + rawDataSize 2641964 serialization.ddl struct alltypesorc { byte ctinyint, i16 csmallint, i32 cint, i64 cbigint, float cfloat, double cdouble, string cstring1, string cstring2, timestamp ctimestamp1, timestamp ctimestamp2, bool cboolean1, bool cboolean2} serialization.format 1 serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde @@ -30742,24 +30742,24 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesorc - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: (cstring1) IN ('biology', 'history', 'topology') (type: boolean) - Statistics: Num rows: 6144 Data size: 188618 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() keys: cstring1 (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 6144 Data size: 188618 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) null sort order: a sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 6144 Data size: 188618 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE tag: -1 value expressions: _col1 (type: bigint) auto parallelism: false @@ -30783,7 +30783,7 @@ STAGE PLANS: name default.alltypesorc numFiles 1 numRows 12288 - rawDataSize 0 + rawDataSize 2641964 serialization.ddl struct alltypesorc { byte ctinyint, i16 csmallint, i32 cint, i64 cbigint, float cfloat, double cdouble, string cstring1, string cstring2, timestamp ctimestamp1, timestamp ctimestamp2, bool cboolean1, bool cboolean2} serialization.format 1 serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde @@ -30804,7 +30804,7 @@ STAGE PLANS: name default.alltypesorc numFiles 1 numRows 12288 - rawDataSize 0 + rawDataSize 2641964 serialization.ddl struct alltypesorc { byte ctinyint, i16 csmallint, i32 cint, i64 cbigint, float cfloat, double cdouble, string cstring1, string cstring2, timestamp ctimestamp1, timestamp ctimestamp2, bool cboolean1, bool cboolean2} serialization.format 1 serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde @@ -30824,16 +30824,16 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 3072 Data size: 94309 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3072 Data size: 660491 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col1 (type: bigint), _col0 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 3072 Data size: 94309 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3072 Data size: 660491 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: string) null sort order: a sort order: + - Statistics: Num rows: 3072 Data size: 94309 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3072 Data size: 660491 Basic stats: COMPLETE Column stats: NONE tag: -1 value expressions: _col0 (type: bigint) auto parallelism: false @@ -30844,13 +30844,13 @@ STAGE PLANS: Select Operator expressions: VALUE._col0 (type: bigint), KEY.reducesinkkey0 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 3072 Data size: 94309 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3072 Data size: 660491 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 3072 Data size: 94309 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3072 Data size: 660491 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat diff --git a/ql/src/test/results/clientpositive/spark/vectorization_13.q.out b/ql/src/test/results/clientpositive/spark/vectorization_13.q.out index a6ef031..00c9328 100644 --- a/ql/src/test/results/clientpositive/spark/vectorization_13.q.out +++ b/ql/src/test/results/clientpositive/spark/vectorization_13.q.out @@ -84,7 +84,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesorc - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] @@ -94,7 +94,7 @@ STAGE PLANS: native: true predicateExpression: FilterExprOrExpr(children: FilterExprAndExpr(children: FilterDoubleColLessDoubleScalar(col 4, val 3569.0) -> boolean, FilterDoubleScalarGreaterEqualDoubleColumn(val 10.175, col 5) -> boolean, FilterLongColNotEqualLongScalar(col 10, val 1) -> boolean) -> boolean, FilterExprAndExpr(children: FilterDoubleColGreaterDoubleScalar(col 12, val 11.0)(children: CastTimestampToDouble(col 8) -> 12:double) -> boolean, FilterDoubleColNotEqualDoubleScalar(col 12, val 12.0)(children: CastTimestampToDouble(col 9) -> 12:double) -> boolean, FilterDecimalColLessDecimalScalar(col 13, val 9763215.5639)(children: CastLongToDecimal(col 0) -> 13:decimal(11,4)) -> boolean) -> boolean) -> boolean predicate: (((cfloat < 3569) and (10.175 >= cdouble) and (cboolean1 <> 1)) or ((UDFToDouble(ctimestamp1) > 11.0) and (UDFToDouble(ctimestamp2) <> 12.0) and (CAST( ctinyint AS decimal(11,4)) < 9763215.5639))) (type: boolean) - Statistics: Num rows: 2730 Data size: 83809 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2730 Data size: 586959 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cboolean1 (type: boolean), ctinyint (type: tinyint), ctimestamp1 (type: timestamp), cfloat (type: float), cstring1 (type: string) outputColumnNames: cboolean1, ctinyint, ctimestamp1, cfloat, cstring1 @@ -102,7 +102,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [10, 0, 8, 4, 6] - Statistics: Num rows: 2730 Data size: 83809 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2730 Data size: 586959 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: max(ctinyint), sum(cfloat), stddev_pop(cfloat), stddev_pop(ctinyint), max(cfloat), min(ctinyint) Group By Vectorization: @@ -116,12 +116,12 @@ STAGE PLANS: keys: cboolean1 (type: boolean), ctinyint (type: tinyint), ctimestamp1 (type: timestamp), cfloat (type: float), cstring1 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 - Statistics: Num rows: 2730 Data size: 83809 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2730 Data size: 586959 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: boolean), _col1 (type: tinyint), _col2 (type: timestamp), _col3 (type: float), _col4 (type: string) sort order: +++++ Map-reduce partition columns: _col0 (type: boolean), _col1 (type: tinyint), _col2 (type: timestamp), _col3 (type: float), _col4 (type: string) - Statistics: Num rows: 2730 Data size: 83809 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2730 Data size: 586959 Basic stats: COMPLETE Column stats: NONE value expressions: _col5 (type: tinyint), _col6 (type: double), _col7 (type: struct), _col8 (type: struct), _col9 (type: float), _col10 (type: tinyint) Execution mode: vectorized Map Vectorization: @@ -144,15 +144,15 @@ STAGE PLANS: keys: KEY._col0 (type: boolean), KEY._col1 (type: tinyint), KEY._col2 (type: timestamp), KEY._col3 (type: float), KEY._col4 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 - Statistics: Num rows: 1365 Data size: 41904 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1365 Data size: 293479 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: boolean), _col1 (type: tinyint), _col2 (type: timestamp), _col3 (type: float), _col4 (type: string), (- _col1) (type: tinyint), _col5 (type: tinyint), ((- _col1) + _col5) (type: tinyint), _col6 (type: double), (_col6 * UDFToDouble(((- _col1) + _col5))) (type: double), (- _col6) (type: double), (79.553 * _col3) (type: float), _col7 (type: double), (- _col6) (type: double), _col8 (type: double), (CAST( ((- _col1) + _col5) AS decimal(3,0)) - 10.175) (type: decimal(7,3)), (- (- _col6)) (type: double), (-26.28 / (- (- _col6))) (type: double), _col9 (type: float), ((_col6 * UDFToDouble(((- _col1) + _col5))) / UDFToDouble(_col1)) (type: double), _col10 (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 - Statistics: Num rows: 1365 Data size: 41904 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1365 Data size: 293479 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: boolean), _col1 (type: tinyint), _col2 (type: timestamp), _col3 (type: float), _col4 (type: string), _col5 (type: tinyint), _col6 (type: tinyint), _col7 (type: tinyint), _col8 (type: double), _col9 (type: double), _col10 (type: double), _col11 (type: float), _col12 (type: double), _col13 (type: double), _col14 (type: double), _col15 (type: decimal(7,3)), _col16 (type: double), _col17 (type: double), _col18 (type: float), _col19 (type: double), _col20 (type: tinyint) sort order: +++++++++++++++++++++ - Statistics: Num rows: 1365 Data size: 41904 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1365 Data size: 293479 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 Reducer 3 Execution mode: vectorized @@ -171,19 +171,19 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 10, 14, 15, 16, 17, 18, 19, 20] - Statistics: Num rows: 1365 Data size: 41904 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1365 Data size: 293479 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 40 Limit Vectorization: className: VectorLimitOperator native: true - Statistics: Num rows: 40 Data size: 1200 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 8600 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 40 Data size: 1200 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 8600 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -389,7 +389,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesorc - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] @@ -399,7 +399,7 @@ STAGE PLANS: native: true predicateExpression: FilterExprOrExpr(children: FilterExprAndExpr(children: FilterDoubleColLessDoubleScalar(col 4, val 3569.0) -> boolean, FilterDoubleScalarGreaterEqualDoubleColumn(val 10.175, col 5) -> boolean, FilterLongColNotEqualLongScalar(col 10, val 1) -> boolean) -> boolean, FilterExprAndExpr(children: FilterDoubleColGreaterDoubleScalar(col 12, val -1.388)(children: CastTimestampToDouble(col 8) -> 12:double) -> boolean, FilterDoubleColNotEqualDoubleScalar(col 12, val -1.3359999999999999)(children: CastTimestampToDouble(col 9) -> 12:double) -> boolean, FilterDecimalColLessDecimalScalar(col 13, val 9763215.5639)(children: CastLongToDecimal(col 0) -> 13:decimal(11,4)) -> boolean) -> boolean) -> boolean predicate: (((cfloat < 3569) and (10.175 >= cdouble) and (cboolean1 <> 1)) or ((UDFToDouble(ctimestamp1) > -1.388) and (UDFToDouble(ctimestamp2) <> -1.3359999999999999) and (CAST( ctinyint AS decimal(11,4)) < 9763215.5639))) (type: boolean) - Statistics: Num rows: 2730 Data size: 83809 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2730 Data size: 586959 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cboolean1 (type: boolean), ctinyint (type: tinyint), ctimestamp1 (type: timestamp), cfloat (type: float), cstring1 (type: string) outputColumnNames: cboolean1, ctinyint, ctimestamp1, cfloat, cstring1 @@ -407,7 +407,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [10, 0, 8, 4, 6] - Statistics: Num rows: 2730 Data size: 83809 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2730 Data size: 586959 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: max(ctinyint), sum(cfloat), stddev_pop(cfloat), stddev_pop(ctinyint), max(cfloat), min(ctinyint) Group By Vectorization: @@ -421,12 +421,12 @@ STAGE PLANS: keys: cboolean1 (type: boolean), ctinyint (type: tinyint), ctimestamp1 (type: timestamp), cfloat (type: float), cstring1 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 - Statistics: Num rows: 2730 Data size: 83809 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2730 Data size: 586959 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: boolean), _col1 (type: tinyint), _col2 (type: timestamp), _col3 (type: float), _col4 (type: string) sort order: +++++ Map-reduce partition columns: _col0 (type: boolean), _col1 (type: tinyint), _col2 (type: timestamp), _col3 (type: float), _col4 (type: string) - Statistics: Num rows: 2730 Data size: 83809 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2730 Data size: 586959 Basic stats: COMPLETE Column stats: NONE value expressions: _col5 (type: tinyint), _col6 (type: double), _col7 (type: struct), _col8 (type: struct), _col9 (type: float), _col10 (type: tinyint) Execution mode: vectorized Map Vectorization: @@ -449,15 +449,15 @@ STAGE PLANS: keys: KEY._col0 (type: boolean), KEY._col1 (type: tinyint), KEY._col2 (type: timestamp), KEY._col3 (type: float), KEY._col4 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 - Statistics: Num rows: 1365 Data size: 41904 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1365 Data size: 293479 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: boolean), _col1 (type: tinyint), _col2 (type: timestamp), _col3 (type: float), _col4 (type: string), (- _col1) (type: tinyint), _col5 (type: tinyint), ((- _col1) + _col5) (type: tinyint), _col6 (type: double), (_col6 * UDFToDouble(((- _col1) + _col5))) (type: double), (- _col6) (type: double), (79.553 * _col3) (type: float), _col7 (type: double), (- _col6) (type: double), _col8 (type: double), (CAST( ((- _col1) + _col5) AS decimal(3,0)) - 10.175) (type: decimal(7,3)), (- (- _col6)) (type: double), (-26.28 / (- (- _col6))) (type: double), _col9 (type: float), ((_col6 * UDFToDouble(((- _col1) + _col5))) / UDFToDouble(_col1)) (type: double), _col10 (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 - Statistics: Num rows: 1365 Data size: 41904 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1365 Data size: 293479 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: boolean), _col1 (type: tinyint), _col2 (type: timestamp), _col3 (type: float), _col4 (type: string), _col5 (type: tinyint), _col6 (type: tinyint), _col7 (type: tinyint), _col8 (type: double), _col9 (type: double), _col10 (type: double), _col11 (type: float), _col12 (type: double), _col13 (type: double), _col14 (type: double), _col15 (type: decimal(7,3)), _col16 (type: double), _col17 (type: double), _col18 (type: float), _col19 (type: double), _col20 (type: tinyint) sort order: +++++++++++++++++++++ - Statistics: Num rows: 1365 Data size: 41904 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1365 Data size: 293479 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 Reducer 3 Execution mode: vectorized @@ -476,19 +476,19 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 10, 14, 15, 16, 17, 18, 19, 20] - Statistics: Num rows: 1365 Data size: 41904 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1365 Data size: 293479 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 40 Limit Vectorization: className: VectorLimitOperator native: true - Statistics: Num rows: 40 Data size: 1200 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 8600 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 40 Data size: 1200 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 8600 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/spark/vectorization_14.q.out b/ql/src/test/results/clientpositive/spark/vectorization_14.q.out index 1541908..d1ef666 100644 --- a/ql/src/test/results/clientpositive/spark/vectorization_14.q.out +++ b/ql/src/test/results/clientpositive/spark/vectorization_14.q.out @@ -84,25 +84,25 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesorc - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: ((UDFToLong(ctinyint) <= cbigint) and ((UDFToDouble(cint) <= cdouble) or (ctimestamp2 < ctimestamp1)) and (cdouble < UDFToDouble(ctinyint)) and ((cbigint > -257) or (cfloat < UDFToFloat(cint)))) (type: boolean) - Statistics: Num rows: 606 Data size: 18603 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 606 Data size: 130292 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ctimestamp1 (type: timestamp), cfloat (type: float), cstring1 (type: string), cboolean1 (type: boolean), cdouble (type: double), (- (-26.28 + cdouble)) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 606 Data size: 18603 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 606 Data size: 130292 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: stddev_samp(_col5), max(_col1), stddev_pop(_col1), count(_col1), var_pop(_col1), var_samp(_col1) keys: _col2 (type: string), _col1 (type: float), _col4 (type: double), _col0 (type: timestamp), _col3 (type: boolean) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 - Statistics: Num rows: 606 Data size: 18603 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 606 Data size: 130292 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: float), _col2 (type: double), _col3 (type: timestamp), _col4 (type: boolean) sort order: +++++ Map-reduce partition columns: _col0 (type: string), _col1 (type: float), _col2 (type: double), _col3 (type: timestamp), _col4 (type: boolean) - Statistics: Num rows: 606 Data size: 18603 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 606 Data size: 130292 Basic stats: COMPLETE Column stats: NONE value expressions: _col5 (type: struct), _col6 (type: float), _col7 (type: struct), _col8 (type: bigint), _col9 (type: struct), _col10 (type: struct) Execution mode: vectorized Map Vectorization: @@ -125,15 +125,15 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: float), KEY._col2 (type: double), KEY._col3 (type: timestamp), KEY._col4 (type: boolean) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 - Statistics: Num rows: 303 Data size: 9301 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 303 Data size: 65146 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col3 (type: timestamp), _col1 (type: float), _col0 (type: string), _col4 (type: boolean), _col2 (type: double), (-26.28 + _col2) (type: double), (- (-26.28 + _col2)) (type: double), _col5 (type: double), (_col1 * -26.28) (type: float), _col6 (type: float), (- _col1) (type: float), (- _col6) (type: float), ((- (-26.28 + _col2)) / 10.175) (type: double), _col7 (type: double), _col8 (type: bigint), (- ((- (-26.28 + _col2)) / 10.175)) (type: double), (-1.389 % _col5) (type: double), (UDFToDouble(_col1) - _col2) (type: double), _col9 (type: double), (_col9 % 10.175) (type: double), _col10 (type: double), (- (UDFToDouble(_col1) - _col2)) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21 - Statistics: Num rows: 303 Data size: 9301 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 303 Data size: 65146 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col2 (type: string), _col1 (type: float), _col4 (type: double), _col0 (type: timestamp) sort order: ++++ - Statistics: Num rows: 303 Data size: 9301 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 303 Data size: 65146 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: boolean), _col5 (type: double), _col6 (type: double), _col7 (type: double), _col8 (type: float), _col9 (type: float), _col10 (type: float), _col11 (type: float), _col12 (type: double), _col13 (type: double), _col14 (type: bigint), _col15 (type: double), _col16 (type: double), _col17 (type: double), _col18 (type: double), _col19 (type: double), _col20 (type: double), _col21 (type: double) Reducer 3 Execution mode: vectorized @@ -148,10 +148,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey3 (type: timestamp), KEY.reducesinkkey1 (type: float), KEY.reducesinkkey0 (type: string), VALUE._col0 (type: boolean), KEY.reducesinkkey2 (type: double), VALUE._col1 (type: double), VALUE._col2 (type: double), VALUE._col3 (type: double), VALUE._col4 (type: float), VALUE._col5 (type: float), VALUE._col6 (type: float), VALUE._col7 (type: float), VALUE._col8 (type: double), VALUE._col9 (type: double), VALUE._col10 (type: bigint), VALUE._col11 (type: double), VALUE._col12 (type: double), VALUE._col13 (type: double), VALUE._col14 (type: double), VALUE._col15 (type: double), VALUE._col16 (type: double), VALUE._col17 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21 - Statistics: Num rows: 303 Data size: 9301 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 303 Data size: 65146 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 303 Data size: 9301 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 303 Data size: 65146 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/spark/vectorization_15.q.out b/ql/src/test/results/clientpositive/spark/vectorization_15.q.out index 1d925c5..8d96d0d 100644 --- a/ql/src/test/results/clientpositive/spark/vectorization_15.q.out +++ b/ql/src/test/results/clientpositive/spark/vectorization_15.q.out @@ -80,25 +80,25 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesorc - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: ((cstring2 like '%ss%') or (cstring1 like '10%') or ((cint >= -75) and (UDFToShort(ctinyint) = csmallint) and (cdouble >= -3728.0))) (type: boolean) - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cfloat (type: float), cboolean1 (type: boolean), cdouble (type: double), cstring1 (type: string), ctinyint (type: tinyint), cint (type: int), ctimestamp1 (type: timestamp) outputColumnNames: cfloat, cboolean1, cdouble, cstring1, ctinyint, cint, ctimestamp1 - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: stddev_samp(cfloat), min(cdouble), stddev_samp(ctinyint), var_pop(ctinyint), var_samp(cint), stddev_pop(cint) keys: cfloat (type: float), cboolean1 (type: boolean), cdouble (type: double), cstring1 (type: string), ctinyint (type: tinyint), cint (type: int), ctimestamp1 (type: timestamp) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: float), _col1 (type: boolean), _col2 (type: double), _col3 (type: string), _col4 (type: tinyint), _col5 (type: int), _col6 (type: timestamp) sort order: +++++++ Map-reduce partition columns: _col0 (type: float), _col1 (type: boolean), _col2 (type: double), _col3 (type: string), _col4 (type: tinyint), _col5 (type: int), _col6 (type: timestamp) - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE value expressions: _col7 (type: struct), _col8 (type: double), _col9 (type: struct), _col10 (type: struct), _col11 (type: struct), _col12 (type: struct) Execution mode: vectorized Map Vectorization: @@ -121,15 +121,15 @@ STAGE PLANS: keys: KEY._col0 (type: float), KEY._col1 (type: boolean), KEY._col2 (type: double), KEY._col3 (type: string), KEY._col4 (type: tinyint), KEY._col5 (type: int), KEY._col6 (type: timestamp) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 - Statistics: Num rows: 6144 Data size: 188618 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: float), _col1 (type: boolean), _col2 (type: double), _col3 (type: string), _col4 (type: tinyint), _col5 (type: int), _col6 (type: timestamp), _col7 (type: double), (-26.28 - CAST( _col5 AS decimal(10,0))) (type: decimal(13,2)), _col8 (type: double), (_col2 * 79.553) (type: double), (33.0 % _col0) (type: float), _col9 (type: double), _col10 (type: double), (-23.0 % _col2) (type: double), (- _col4) (type: tinyint), _col11 (type: double), (UDFToFloat(_col5) - _col0) (type: float), (-23 % UDFToInteger(_col4)) (type: int), (- (-26.28 - CAST( _col5 AS decimal(10,0)))) (type: decimal(13,2)), _col12 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 - Statistics: Num rows: 6144 Data size: 188618 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: float), _col1 (type: boolean), _col2 (type: double), _col3 (type: string), _col4 (type: tinyint), _col5 (type: int), _col6 (type: timestamp) sort order: +++++++ - Statistics: Num rows: 6144 Data size: 188618 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE value expressions: _col7 (type: double), _col8 (type: decimal(13,2)), _col9 (type: double), _col10 (type: double), _col11 (type: float), _col12 (type: double), _col13 (type: double), _col14 (type: double), _col15 (type: tinyint), _col16 (type: double), _col17 (type: float), _col18 (type: int), _col19 (type: decimal(13,2)), _col20 (type: double) Reducer 3 Execution mode: vectorized @@ -144,10 +144,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: float), KEY.reducesinkkey1 (type: boolean), KEY.reducesinkkey2 (type: double), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: tinyint), KEY.reducesinkkey5 (type: int), KEY.reducesinkkey6 (type: timestamp), VALUE._col0 (type: double), VALUE._col1 (type: decimal(13,2)), VALUE._col2 (type: double), VALUE._col3 (type: double), VALUE._col4 (type: float), VALUE._col5 (type: double), VALUE._col6 (type: double), VALUE._col7 (type: double), VALUE._col8 (type: tinyint), VALUE._col9 (type: double), VALUE._col10 (type: float), VALUE._col11 (type: int), VALUE._col12 (type: decimal(13,2)), VALUE._col13 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 - Statistics: Num rows: 6144 Data size: 188618 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 6144 Data size: 188618 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/spark/vectorization_16.q.out b/ql/src/test/results/clientpositive/spark/vectorization_16.q.out index e731c2d..d7fade1 100644 --- a/ql/src/test/results/clientpositive/spark/vectorization_16.q.out +++ b/ql/src/test/results/clientpositive/spark/vectorization_16.q.out @@ -57,25 +57,25 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesorc - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: ((cstring2 like '%b%') and ((cdouble >= -1.389) or (cstring1 < 'a'))) (type: boolean) - Statistics: Num rows: 4096 Data size: 125745 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4096 Data size: 880654 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cdouble (type: double), cstring1 (type: string), ctimestamp1 (type: timestamp) outputColumnNames: cdouble, cstring1, ctimestamp1 - Statistics: Num rows: 4096 Data size: 125745 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4096 Data size: 880654 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(cdouble), stddev_samp(cdouble), min(cdouble) keys: cdouble (type: double), cstring1 (type: string), ctimestamp1 (type: timestamp) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 4096 Data size: 125745 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4096 Data size: 880654 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: double), _col1 (type: string), _col2 (type: timestamp) sort order: +++ Map-reduce partition columns: _col0 (type: double), _col1 (type: string), _col2 (type: timestamp) - Statistics: Num rows: 4096 Data size: 125745 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4096 Data size: 880654 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: bigint), _col4 (type: struct), _col5 (type: double) Execution mode: vectorized Map Vectorization: @@ -98,14 +98,14 @@ STAGE PLANS: keys: KEY._col0 (type: double), KEY._col1 (type: string), KEY._col2 (type: timestamp) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 2048 Data size: 62872 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2048 Data size: 440327 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col1 (type: string), _col0 (type: double), _col2 (type: timestamp), (_col0 - 9763215.5639) (type: double), (- (_col0 - 9763215.5639)) (type: double), _col3 (type: bigint), _col4 (type: double), (- _col4) (type: double), (_col4 * UDFToDouble(_col3)) (type: double), _col5 (type: double), (9763215.5639 / _col0) (type: double), (CAST( _col3 AS decimal(19,0)) / -1.389) (type: decimal(28,6)), _col4 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 - Statistics: Num rows: 2048 Data size: 62872 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2048 Data size: 440327 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 2048 Data size: 62872 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2048 Data size: 440327 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/spark/vectorization_17.q.out b/ql/src/test/results/clientpositive/spark/vectorization_17.q.out index a8f401b..c5060eb 100644 --- a/ql/src/test/results/clientpositive/spark/vectorization_17.q.out +++ b/ql/src/test/results/clientpositive/spark/vectorization_17.q.out @@ -65,18 +65,18 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesorc - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: ((cbigint > -23) and ((cdouble <> 988888.0) or (CAST( cint AS decimal(13,3)) > -863.257)) and ((ctinyint >= 33) or (UDFToLong(csmallint) >= cbigint) or (UDFToDouble(cfloat) = cdouble))) (type: boolean) - Statistics: Num rows: 4778 Data size: 146682 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4778 Data size: 1027287 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cfloat (type: float), cstring1 (type: string), cint (type: int), ctimestamp1 (type: timestamp), cdouble (type: double), cbigint (type: bigint), (UDFToDouble(cfloat) / UDFToDouble(ctinyint)) (type: double), (UDFToLong(cint) % cbigint) (type: bigint), (- cdouble) (type: double), (cdouble + (UDFToDouble(cfloat) / UDFToDouble(ctinyint))) (type: double), (cdouble / UDFToDouble(cint)) (type: double), (- (- cdouble)) (type: double), (9763215.5639 % CAST( cbigint AS decimal(19,0))) (type: decimal(11,4)), (2563.58 + (- (- cdouble))) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 - Statistics: Num rows: 4778 Data size: 146682 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4778 Data size: 1027287 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col5 (type: bigint), _col0 (type: float) sort order: ++ - Statistics: Num rows: 4778 Data size: 146682 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4778 Data size: 1027287 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string), _col2 (type: int), _col3 (type: timestamp), _col4 (type: double), _col6 (type: double), _col7 (type: bigint), _col8 (type: double), _col9 (type: double), _col10 (type: double), _col11 (type: double), _col12 (type: decimal(11,4)), _col13 (type: double) Execution mode: vectorized Map Vectorization: @@ -100,10 +100,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey1 (type: float), VALUE._col0 (type: string), VALUE._col1 (type: int), VALUE._col2 (type: timestamp), VALUE._col3 (type: double), KEY.reducesinkkey0 (type: bigint), VALUE._col4 (type: double), VALUE._col5 (type: bigint), VALUE._col6 (type: double), VALUE._col7 (type: double), VALUE._col8 (type: double), VALUE._col9 (type: double), VALUE._col10 (type: decimal(11,4)), VALUE._col11 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 - Statistics: Num rows: 4778 Data size: 146682 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4778 Data size: 1027287 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 4778 Data size: 146682 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4778 Data size: 1027287 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/spark/vectorization_9.q.out b/ql/src/test/results/clientpositive/spark/vectorization_9.q.out index e731c2d..d7fade1 100644 --- a/ql/src/test/results/clientpositive/spark/vectorization_9.q.out +++ b/ql/src/test/results/clientpositive/spark/vectorization_9.q.out @@ -57,25 +57,25 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesorc - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: ((cstring2 like '%b%') and ((cdouble >= -1.389) or (cstring1 < 'a'))) (type: boolean) - Statistics: Num rows: 4096 Data size: 125745 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4096 Data size: 880654 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cdouble (type: double), cstring1 (type: string), ctimestamp1 (type: timestamp) outputColumnNames: cdouble, cstring1, ctimestamp1 - Statistics: Num rows: 4096 Data size: 125745 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4096 Data size: 880654 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(cdouble), stddev_samp(cdouble), min(cdouble) keys: cdouble (type: double), cstring1 (type: string), ctimestamp1 (type: timestamp) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 4096 Data size: 125745 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4096 Data size: 880654 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: double), _col1 (type: string), _col2 (type: timestamp) sort order: +++ Map-reduce partition columns: _col0 (type: double), _col1 (type: string), _col2 (type: timestamp) - Statistics: Num rows: 4096 Data size: 125745 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4096 Data size: 880654 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: bigint), _col4 (type: struct), _col5 (type: double) Execution mode: vectorized Map Vectorization: @@ -98,14 +98,14 @@ STAGE PLANS: keys: KEY._col0 (type: double), KEY._col1 (type: string), KEY._col2 (type: timestamp) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 2048 Data size: 62872 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2048 Data size: 440327 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col1 (type: string), _col0 (type: double), _col2 (type: timestamp), (_col0 - 9763215.5639) (type: double), (- (_col0 - 9763215.5639)) (type: double), _col3 (type: bigint), _col4 (type: double), (- _col4) (type: double), (_col4 * UDFToDouble(_col3)) (type: double), _col5 (type: double), (9763215.5639 / _col0) (type: double), (CAST( _col3 AS decimal(19,0)) / -1.389) (type: decimal(28,6)), _col4 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 - Statistics: Num rows: 2048 Data size: 62872 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2048 Data size: 440327 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 2048 Data size: 62872 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2048 Data size: 440327 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/spark/vectorization_div0.q.out b/ql/src/test/results/clientpositive/spark/vectorization_div0.q.out index 923ff5e..c6a3abb 100644 --- a/ql/src/test/results/clientpositive/spark/vectorization_div0.q.out +++ b/ql/src/test/results/clientpositive/spark/vectorization_div0.q.out @@ -21,7 +21,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesorc - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] @@ -33,19 +33,19 @@ STAGE PLANS: native: true projectedOutputColumns: [12] selectExpressions: DoubleColDivideDoubleScalar(col 5, val 0.0) -> 12:double - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 100 Limit Vectorization: className: VectorLimitOperator native: true - Statistics: Num rows: 100 Data size: 3000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 100 Data size: 21500 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 100 Data size: 3000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 100 Data size: 21500 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -201,7 +201,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesorc - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] @@ -211,7 +211,7 @@ STAGE PLANS: native: true predicateExpression: FilterExprAndExpr(children: FilterLongColGreaterLongScalar(col 3, val 0) -> boolean, FilterLongColLessLongScalar(col 3, val 100000000) -> boolean) -> boolean predicate: ((cbigint > 0) and (cbigint < 100000000)) (type: boolean) - Statistics: Num rows: 1365 Data size: 41904 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1365 Data size: 293479 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: (cbigint - 988888) (type: bigint), (cdouble / UDFToDouble((cbigint - 988888))) (type: double), (1.2 / CAST( (cbigint - 988888) AS decimal(19,0))) (type: decimal(22,21)) outputColumnNames: _col0, _col1, _col2 @@ -220,7 +220,7 @@ STAGE PLANS: native: true projectedOutputColumns: [12, 15, 17] selectExpressions: LongColSubtractLongScalar(col 3, val 988888) -> 12:long, DoubleColDivideDoubleColumn(col 5, col 14)(children: CastLongToDouble(col 13)(children: LongColSubtractLongScalar(col 3, val 988888) -> 13:long) -> 14:double) -> 15:double, DecimalScalarDivideDecimalColumn(val 1.2, col 16)(children: CastLongToDecimal(col 13)(children: LongColSubtractLongScalar(col 3, val 988888) -> 13:long) -> 16:decimal(19,0)) -> 17:decimal(22,21) - Statistics: Num rows: 1365 Data size: 41904 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1365 Data size: 293479 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: bigint), _col1 (type: double) sort order: ++ @@ -229,7 +229,7 @@ STAGE PLANS: native: false nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true nativeConditionsNotMet: No TopN IS false, Uniform Hash IS false - Statistics: Num rows: 1365 Data size: 41904 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1365 Data size: 293479 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col2 (type: decimal(22,21)) Execution mode: vectorized @@ -258,19 +258,19 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [0, 1, 2] - Statistics: Num rows: 1365 Data size: 41904 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1365 Data size: 293479 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 100 Limit Vectorization: className: VectorLimitOperator native: true - Statistics: Num rows: 100 Data size: 3000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 100 Data size: 21500 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 100 Data size: 3000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 100 Data size: 21500 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -419,7 +419,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesorc - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] @@ -429,7 +429,7 @@ STAGE PLANS: native: true predicateExpression: FilterExprAndExpr(children: FilterDoubleColGreaterEqualDoubleScalar(col 5, val -500.0) -> boolean, FilterDoubleColLessDoubleScalar(col 5, val -199.0) -> boolean) -> boolean predicate: ((cdouble >= -500.0) and (cdouble < -199.0)) (type: boolean) - Statistics: Num rows: 1365 Data size: 41904 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1365 Data size: 293479 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: (cdouble + 200.0) (type: double), (UDFToDouble(cbigint) / (cdouble + 200.0)) (type: double), ((cdouble + 200.0) / (cdouble + 200.0)) (type: double), (3.0 / (cdouble + 200.0)) (type: double), (1.2 / (cdouble + 200.0)) (type: double) outputColumnNames: _col0, _col1, _col2, _col4, _col5 @@ -438,7 +438,7 @@ STAGE PLANS: native: true projectedOutputColumns: [12, 15, 16, 14, 17] selectExpressions: DoubleColAddDoubleScalar(col 5, val 200.0) -> 12:double, DoubleColDivideDoubleColumn(col 13, col 14)(children: CastLongToDouble(col 3) -> 13:double, DoubleColAddDoubleScalar(col 5, val 200.0) -> 14:double) -> 15:double, DoubleColDivideDoubleColumn(col 13, col 14)(children: DoubleColAddDoubleScalar(col 5, val 200.0) -> 13:double, DoubleColAddDoubleScalar(col 5, val 200.0) -> 14:double) -> 16:double, DoubleScalarDivideDoubleColumn(val 3.0, col 13)(children: DoubleColAddDoubleScalar(col 5, val 200.0) -> 13:double) -> 14:double, DoubleScalarDivideDoubleColumn(val 1.2, col 13)(children: DoubleColAddDoubleScalar(col 5, val 200.0) -> 13:double) -> 17:double - Statistics: Num rows: 1365 Data size: 41904 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1365 Data size: 293479 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: double), _col1 (type: double) sort order: ++ @@ -447,7 +447,7 @@ STAGE PLANS: native: false nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true nativeConditionsNotMet: No TopN IS false, Uniform Hash IS false - Statistics: Num rows: 1365 Data size: 41904 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1365 Data size: 293479 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col2 (type: double), _col4 (type: double), _col5 (type: double) Execution mode: vectorized @@ -476,19 +476,19 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [0, 1, 2, 1, 3, 4] - Statistics: Num rows: 1365 Data size: 41904 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1365 Data size: 293479 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 100 Limit Vectorization: className: VectorLimitOperator native: true - Statistics: Num rows: 100 Data size: 3000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 100 Data size: 21500 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 100 Data size: 3000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 100 Data size: 21500 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/spark/vectorization_pushdown.q.out b/ql/src/test/results/clientpositive/spark/vectorization_pushdown.q.out index 1f1bb30..4e0916d 100644 --- a/ql/src/test/results/clientpositive/spark/vectorization_pushdown.q.out +++ b/ql/src/test/results/clientpositive/spark/vectorization_pushdown.q.out @@ -23,14 +23,14 @@ STAGE PLANS: TableScan alias: alltypesorc filterExpr: (UDFToDouble(cbigint) < cdouble) (type: boolean) - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (UDFToDouble(cbigint) < cdouble) (type: boolean) - Statistics: Num rows: 4096 Data size: 125745 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4096 Data size: 880654 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cbigint (type: bigint) outputColumnNames: cbigint - Statistics: Num rows: 4096 Data size: 125745 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4096 Data size: 880654 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: avg(cbigint) mode: hash diff --git a/ql/src/test/results/clientpositive/spark/vectorization_short_regress.q.out b/ql/src/test/results/clientpositive/spark/vectorization_short_regress.q.out index 98c0505..1859c71 100644 --- a/ql/src/test/results/clientpositive/spark/vectorization_short_regress.q.out +++ b/ql/src/test/results/clientpositive/spark/vectorization_short_regress.q.out @@ -91,7 +91,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesorc - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] @@ -101,7 +101,7 @@ STAGE PLANS: native: true predicateExpression: FilterExprOrExpr(children: FilterLongScalarEqualLongColumn(val 762, col 3) -> boolean, FilterExprAndExpr(children: FilterDoubleColLessDoubleColumn(col 12, col 4)(children: CastLongToFloatViaLongToDouble(col 1) -> 12:double) -> boolean, FilterDoubleColGreaterDoubleScalar(col 12, val -5.0)(children: CastTimestampToDouble(col 9) -> 12:double) -> boolean, FilterDoubleColNotEqualDoubleColumn(col 5, col 12)(children: CastLongToDouble(col 2) -> 12:double) -> boolean) -> boolean, FilterStringGroupColEqualStringScalar(col 6, val a) -> boolean, FilterExprAndExpr(children: FilterDecimalColLessEqualDecimalScalar(col 13, val -1.389)(children: CastLongToDecimal(col 3) -> 13:decimal(22,3)) -> boolean, FilterStringGroupColNotEqualStringScalar(col 7, val a) -> boolean, FilterDecimalScalarNotEqualDecimalColumn(val 79.553, col 14)(children: CastLongToDecimal(col 2) -> 14:decimal(13,3)) -> boolean, FilterLongColNotEqualLongColumn(col 11, col 10) -> boolean) -> boolean) -> boolean predicate: ((762 = cbigint) or ((UDFToFloat(csmallint) < cfloat) and (UDFToDouble(ctimestamp2) > -5.0) and (cdouble <> UDFToDouble(cint))) or (cstring1 = 'a') or ((CAST( cbigint AS decimal(22,3)) <= -1.389) and (cstring2 <> 'a') and (79.553 <> CAST( cint AS decimal(13,3))) and (cboolean2 <> cboolean1))) (type: boolean) - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cint (type: int), cdouble (type: double), csmallint (type: smallint), cfloat (type: float), ctinyint (type: tinyint) outputColumnNames: cint, cdouble, csmallint, cfloat, ctinyint @@ -109,7 +109,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [2, 5, 1, 4, 0] - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: avg(cint), sum(cdouble), stddev_pop(cint), stddev_samp(csmallint), var_samp(cint), avg(cfloat), stddev_samp(cint), min(ctinyint), count(csmallint) Group By Vectorization: @@ -329,7 +329,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesorc - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] @@ -339,7 +339,7 @@ STAGE PLANS: native: true predicateExpression: FilterExprOrExpr(children: FilterExprAndExpr(children: FilterLongColLessEqualLongScalar(col 3, val 197) -> boolean, FilterLongColLessLongColumn(col 2, col 3)(children: col 2) -> boolean) -> boolean, FilterExprAndExpr(children: FilterDoubleColGreaterEqualDoubleScalar(col 5, val -26.28) -> boolean, FilterDoubleColGreaterDoubleColumn(col 12, col 5)(children: CastLongToDouble(col 1) -> 12:double) -> boolean) -> boolean, FilterExprAndExpr(children: FilterDoubleColGreaterDoubleColumn(col 12, col 4)(children: CastLongToFloatViaLongToDouble(col 0) -> 12:double) -> boolean, FilterStringColRegExpStringScalar(col 6, pattern .*ss.*) -> boolean) -> boolean, FilterExprAndExpr(children: FilterDoubleColGreaterDoubleScalar(col 4, val 79.5530014038086) -> boolean, FilterStringColLikeStringScalar(col 7, pattern 10%) -> boolean) -> boolean) -> boolean predicate: (((cbigint <= 197) and (UDFToLong(cint) < cbigint)) or ((cdouble >= -26.28) and (UDFToDouble(csmallint) > cdouble)) or ((UDFToFloat(ctinyint) > cfloat) and cstring1 regexp '.*ss.*') or ((cfloat > 79.553) and (cstring2 like '10%'))) (type: boolean) - Statistics: Num rows: 6826 Data size: 209555 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6826 Data size: 1467614 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cint (type: int), cbigint (type: bigint), csmallint (type: smallint), cdouble (type: double), ctinyint (type: tinyint) outputColumnNames: cint, cbigint, csmallint, cdouble, ctinyint @@ -347,7 +347,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [2, 3, 1, 5, 0] - Statistics: Num rows: 6826 Data size: 209555 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6826 Data size: 1467614 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: max(cint), var_pop(cbigint), stddev_pop(csmallint), max(cdouble), avg(ctinyint), min(cint), min(cdouble), stddev_samp(csmallint), var_samp(cint) Group By Vectorization: @@ -559,7 +559,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesorc - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] @@ -569,7 +569,7 @@ STAGE PLANS: native: true predicateExpression: FilterExprOrExpr(children: FilterTimestampColEqualTimestampColumn(col 8, col 9) -> boolean, FilterDoubleScalarEqualDoubleColumn(val 762.0, col 4) -> boolean, FilterStringGroupColEqualStringScalar(col 6, val ss) -> boolean, FilterExprAndExpr(children: FilterLongColLessEqualLongColumn(col 1, col 3)(children: col 1) -> boolean, FilterLongScalarEqualLongColumn(val 1, col 11) -> boolean) -> boolean, FilterExprAndExpr(children: SelectColumnIsNotNull(col 10) -> boolean, SelectColumnIsNotNull(col 9) -> boolean, FilterStringGroupColGreaterStringScalar(col 7, val a) -> boolean) -> boolean) -> boolean predicate: ((ctimestamp1 = ctimestamp2) or (762 = cfloat) or (cstring1 = 'ss') or ((UDFToLong(csmallint) <= cbigint) and (1 = cboolean2)) or (cboolean1 is not null and ctimestamp2 is not null and (cstring2 > 'a'))) (type: boolean) - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cbigint (type: bigint), ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cdouble (type: double) outputColumnNames: cbigint, ctinyint, csmallint, cint, cdouble @@ -577,7 +577,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [3, 0, 1, 2, 5] - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: var_pop(cbigint), count(), max(ctinyint), stddev_pop(csmallint), max(cint), stddev_samp(cdouble), count(ctinyint), avg(ctinyint) Group By Vectorization: @@ -768,7 +768,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesorc - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] @@ -778,7 +778,7 @@ STAGE PLANS: native: true predicateExpression: FilterExprOrExpr(children: FilterExprAndExpr(children: FilterTimestampColLessEqualTimestampColumn(col 9, col 8) -> boolean, FilterDoubleColNotEqualDoubleColumn(col 12, col 5)(children: CastLongToDouble(col 3) -> 12:double) -> boolean, FilterStringScalarLessEqualStringGroupColumn(val ss, col 6) -> boolean) -> boolean, FilterExprAndExpr(children: FilterLongColLessLongColumn(col 1, col 0)(children: col 0) -> boolean, FilterDoubleColGreaterEqualDoubleScalar(col 12, val 0.0)(children: CastTimestampToDouble(col 8) -> 12:double) -> boolean) -> boolean, FilterDoubleColEqualDoubleScalar(col 4, val 17.0) -> boolean) -> boolean predicate: (((ctimestamp2 <= ctimestamp1) and (UDFToDouble(cbigint) <> cdouble) and ('ss' <= cstring1)) or ((csmallint < UDFToShort(ctinyint)) and (UDFToDouble(ctimestamp1) >= 0.0)) or (cfloat = 17)) (type: boolean) - Statistics: Num rows: 8874 Data size: 272428 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8874 Data size: 1907941 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ctinyint (type: tinyint), cbigint (type: bigint), cint (type: int), cfloat (type: float) outputColumnNames: ctinyint, cbigint, cint, cfloat @@ -786,7 +786,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [0, 3, 2, 4] - Statistics: Num rows: 8874 Data size: 272428 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8874 Data size: 1907941 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: avg(ctinyint), max(cbigint), stddev_samp(cint), var_pop(cint), var_pop(cbigint), max(cfloat) Group By Vectorization: @@ -985,7 +985,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesorc - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] @@ -995,7 +995,7 @@ STAGE PLANS: native: true predicateExpression: FilterExprOrExpr(children: FilterExprAndExpr(children: FilterStringColRegExpStringScalar(col 6, pattern a.*) -> boolean, FilterStringColLikeStringScalar(col 7, pattern %ss%) -> boolean) -> boolean, FilterExprAndExpr(children: FilterLongScalarNotEqualLongColumn(val 1, col 11) -> boolean, FilterDecimalColLessDecimalScalar(col 12, val 79.553)(children: CastLongToDecimal(col 1) -> 12:decimal(8,3)) -> boolean, FilterLongScalarNotEqualLongColumn(val -257, col 0)(children: col 0) -> boolean) -> boolean, FilterExprAndExpr(children: FilterDoubleColGreaterDoubleColumn(col 5, col 13)(children: CastLongToDouble(col 0) -> 13:double) -> boolean, FilterDoubleColGreaterEqualDoubleColumn(col 4, col 13)(children: CastLongToFloatViaLongToDouble(col 2) -> 13:double) -> boolean) -> boolean, FilterExprAndExpr(children: FilterLongColLessLongColumn(col 2, col 3)(children: col 2) -> boolean, FilterLongColGreaterLongColumn(col 0, col 3)(children: col 0) -> boolean) -> boolean) -> boolean predicate: ((cstring1 regexp 'a.*' and (cstring2 like '%ss%')) or ((1 <> cboolean2) and (CAST( csmallint AS decimal(8,3)) < 79.553) and (-257 <> UDFToInteger(ctinyint))) or ((cdouble > UDFToDouble(ctinyint)) and (cfloat >= UDFToFloat(cint))) or ((UDFToLong(cint) < cbigint) and (UDFToLong(ctinyint) > cbigint))) (type: boolean) - Statistics: Num rows: 9898 Data size: 303864 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 9898 Data size: 2128105 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cint (type: int), cdouble (type: double), ctimestamp2 (type: timestamp), cstring1 (type: string), cboolean2 (type: boolean), ctinyint (type: tinyint), cfloat (type: float), ctimestamp1 (type: timestamp), csmallint (type: smallint), cbigint (type: bigint), (-3728 * cbigint) (type: bigint), (- cint) (type: int), (-863.257 - CAST( cint AS decimal(10,0))) (type: decimal(14,3)), (- csmallint) (type: smallint), (csmallint - (- csmallint)) (type: smallint), ((csmallint - (- csmallint)) + (- csmallint)) (type: smallint), (UDFToDouble(cint) / UDFToDouble(cint)) (type: double), ((-863.257 - CAST( cint AS decimal(10,0))) - -26.28) (type: decimal(15,3)), (- cfloat) (type: float), (cdouble * -89010.0) (type: double), (UDFToDouble(ctinyint) / 988888.0) (type: double), (- ctinyint) (type: tinyint), (79.553 / CAST( ctinyint AS decimal(3,0))) (type: decimal(9,7)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22 @@ -1004,7 +1004,7 @@ STAGE PLANS: native: true projectedOutputColumns: [2, 5, 9, 6, 11, 0, 4, 8, 1, 3, 14, 15, 17, 18, 20, 22, 24, 26, 13, 23, 28, 19, 30] selectExpressions: LongScalarMultiplyLongColumn(val -3728, col 3) -> 14:long, LongColUnaryMinus(col 2) -> 15:long, DecimalScalarSubtractDecimalColumn(val -863.257, col 16)(children: CastLongToDecimal(col 2) -> 16:decimal(10,0)) -> 17:decimal(14,3), LongColUnaryMinus(col 1) -> 18:long, LongColSubtractLongColumn(col 1, col 19)(children: LongColUnaryMinus(col 1) -> 19:long) -> 20:long, LongColAddLongColumn(col 21, col 19)(children: LongColSubtractLongColumn(col 1, col 19)(children: LongColUnaryMinus(col 1) -> 19:long) -> 21:long, LongColUnaryMinus(col 1) -> 19:long) -> 22:long, DoubleColDivideDoubleColumn(col 13, col 23)(children: CastLongToDouble(col 2) -> 13:double, CastLongToDouble(col 2) -> 23:double) -> 24:double, DecimalColSubtractDecimalScalar(col 25, val -26.28)(children: DecimalScalarSubtractDecimalColumn(val -863.257, col 16)(children: CastLongToDecimal(col 2) -> 16:decimal(10,0)) -> 25:decimal(14,3)) -> 26:decimal(15,3), DoubleColUnaryMinus(col 4) -> 13:double, DoubleColMultiplyDoubleScalar(col 5, val -89010.0) -> 23:double, DoubleColDivideDoubleScalar(col 27, val 988888.0)(children: CastLongToDouble(col 0) -> 27:double) -> 28:double, LongColUnaryMinus(col 0) -> 19:long, DecimalScalarDivideDecimalColumn(val 79.553, col 29)(children: CastLongToDecimal(col 0) -> 29:decimal(3,0)) -> 30:decimal(9,7) - Statistics: Num rows: 9898 Data size: 303864 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 9898 Data size: 2128105 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: double), _col2 (type: timestamp), _col3 (type: string), _col4 (type: boolean), _col5 (type: tinyint), _col6 (type: float), _col7 (type: timestamp), _col8 (type: smallint), _col9 (type: bigint), _col10 (type: bigint), _col11 (type: int), _col12 (type: decimal(14,3)), _col13 (type: smallint), _col14 (type: smallint), _col15 (type: smallint), _col16 (type: double), _col17 (type: decimal(15,3)), _col18 (type: float), _col19 (type: double), _col20 (type: double), _col21 (type: tinyint), _col22 (type: decimal(9,7)) sort order: +++++++++++++++++++++++ @@ -1013,7 +1013,7 @@ STAGE PLANS: native: false nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true nativeConditionsNotMet: No TopN IS false, Uniform Hash IS false - Statistics: Num rows: 9898 Data size: 303864 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 9898 Data size: 2128105 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 Execution mode: vectorized Map Vectorization: @@ -1041,19 +1041,19 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22] - Statistics: Num rows: 9898 Data size: 303864 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 9898 Data size: 2128105 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 50 Limit Vectorization: className: VectorLimitOperator native: true - Statistics: Num rows: 50 Data size: 1500 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 50 Data size: 10750 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 50 Data size: 1500 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 50 Data size: 10750 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1282,7 +1282,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesorc - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] @@ -1292,7 +1292,7 @@ STAGE PLANS: native: true predicateExpression: FilterExprOrExpr(children: FilterExprAndExpr(children: FilterLongScalarGreaterLongColumn(val 197, col 0)(children: col 0) -> boolean, FilterLongColEqualLongColumn(col 2, col 3)(children: col 2) -> boolean) -> boolean, FilterLongColEqualLongScalar(col 3, val 359) -> boolean, FilterLongColLessLongScalar(col 10, val 0) -> boolean, FilterExprAndExpr(children: FilterStringColLikeStringScalar(col 6, pattern %ss) -> boolean, FilterDoubleColLessEqualDoubleColumn(col 4, col 12)(children: CastLongToFloatViaLongToDouble(col 0) -> 12:double) -> boolean) -> boolean) -> boolean predicate: (((197 > UDFToInteger(ctinyint)) and (UDFToLong(cint) = cbigint)) or (cbigint = 359) or (cboolean1 < 0) or ((cstring1 like '%ss') and (cfloat <= UDFToFloat(ctinyint)))) (type: boolean) - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cint (type: int), cbigint (type: bigint), cstring1 (type: string), cboolean1 (type: boolean), cfloat (type: float), cdouble (type: double), ctimestamp2 (type: timestamp), csmallint (type: smallint), cstring2 (type: string), cboolean2 (type: boolean), (UDFToDouble(cint) / UDFToDouble(cbigint)) (type: double), (CAST( cbigint AS decimal(19,0)) % 79.553) (type: decimal(5,3)), (- (UDFToDouble(cint) / UDFToDouble(cbigint))) (type: double), (10.175 % cfloat) (type: float), (- cfloat) (type: float), (cfloat - (- cfloat)) (type: float), ((cfloat - (- cfloat)) % -6432.0) (type: float), (cdouble * UDFToDouble(csmallint)) (type: double), (- cdouble) (type: double), (- cbigint) (type: bigint), (UDFToDouble(cfloat) - (UDFToDouble(cint) / UDFToDouble(cbigint))) (type: double), (- csmallint) (type: smallint), (3569 % cbigint) (type: bigint), (359.0 - cdouble) (type: double), (- csmallint) (type: smallint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24 @@ -1301,7 +1301,7 @@ STAGE PLANS: native: true projectedOutputColumns: [2, 3, 6, 10, 4, 5, 9, 1, 7, 11, 14, 16, 12, 13, 17, 19, 18, 21, 20, 22, 23, 26, 27, 24, 28] selectExpressions: DoubleColDivideDoubleColumn(col 12, col 13)(children: CastLongToDouble(col 2) -> 12:double, CastLongToDouble(col 3) -> 13:double) -> 14:double, DecimalColModuloDecimalScalar(col 15, val 79.553)(children: CastLongToDecimal(col 3) -> 15:decimal(19,0)) -> 16:decimal(5,3), DoubleColUnaryMinus(col 17)(children: DoubleColDivideDoubleColumn(col 12, col 13)(children: CastLongToDouble(col 2) -> 12:double, CastLongToDouble(col 3) -> 13:double) -> 17:double) -> 12:double, DoubleScalarModuloDoubleColumn(val 10.175000190734863, col 4) -> 13:double, DoubleColUnaryMinus(col 4) -> 17:double, DoubleColSubtractDoubleColumn(col 4, col 18)(children: DoubleColUnaryMinus(col 4) -> 18:double) -> 19:double, DoubleColModuloDoubleScalar(col 20, val -6432.0)(children: DoubleColSubtractDoubleColumn(col 4, col 18)(children: DoubleColUnaryMinus(col 4) -> 18:double) -> 20:double) -> 18:double, DoubleColMultiplyDoubleColumn(col 5, col 20)(children: CastLongToDouble(col 1) -> 20:double) -> 21:double, DoubleColUnaryMinus(col 5) -> 20:double, LongColUnaryMinus(col 3) -> 22:long, DoubleColSubtractDoubleColumn(col 4, col 25)(children: col 4, DoubleColDivideDoubleColumn(col 23, col 24)(children: CastLongToDouble(col 2) -> 23:double, CastLongToDouble(col 3) -> 24:double) -> 25:double) -> 23:double, LongColUnaryMinus(col 1) -> 26:long, LongScalarModuloLongColumn(val 3569, col 3) -> 27:long, DoubleScalarSubtractDoubleColumn(val 359.0, col 5) -> 24:double, LongColUnaryMinus(col 1) -> 28:long - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: bigint), _col2 (type: string), _col3 (type: boolean), _col4 (type: float), _col5 (type: double), _col6 (type: timestamp), _col7 (type: smallint), _col8 (type: string), _col9 (type: boolean), _col10 (type: double), _col11 (type: decimal(5,3)), _col12 (type: double), _col13 (type: float), _col14 (type: float), _col15 (type: float), _col16 (type: float), _col17 (type: double), _col18 (type: double), _col19 (type: bigint), _col20 (type: double), _col21 (type: smallint), _col22 (type: bigint), _col23 (type: double), _col24 (type: smallint) sort order: +++++++++++++++++++++++++ @@ -1310,7 +1310,7 @@ STAGE PLANS: native: false nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true nativeConditionsNotMet: No TopN IS false, Uniform Hash IS false - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 Execution mode: vectorized Map Vectorization: @@ -1338,19 +1338,19 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 21] - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 25 Limit Vectorization: className: VectorLimitOperator native: true - Statistics: Num rows: 25 Data size: 750 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 5375 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 25 Data size: 750 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 5375 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1528,7 +1528,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesorc - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] @@ -1538,7 +1538,7 @@ STAGE PLANS: native: true predicateExpression: FilterExprOrExpr(children: FilterExprAndExpr(children: FilterDecimalColGreaterDecimalScalar(col 12, val -26.28)(children: CastLongToDecimal(col 1) -> 12:decimal(7,2)) -> boolean, FilterStringColLikeStringScalar(col 7, pattern ss) -> boolean) -> boolean, FilterExprAndExpr(children: FilterDoubleColLessEqualDoubleColumn(col 5, col 13)(children: CastLongToDouble(col 3) -> 13:double) -> boolean, FilterStringGroupColGreaterEqualStringScalar(col 6, val ss) -> boolean, FilterDoubleColNotEqualDoubleColumn(col 13, col 5)(children: CastLongToDouble(col 2) -> 13:double) -> boolean) -> boolean, FilterLongColEqualLongScalar(col 0, val -89010)(children: col 0) -> boolean, FilterExprAndExpr(children: FilterDoubleColLessEqualDoubleColumn(col 13, col 4)(children: CastLongToFloatViaLongToDouble(col 3) -> 13:double) -> boolean, FilterDecimalScalarLessEqualDecimalColumn(val -26.28, col 12)(children: CastLongToDecimal(col 1) -> 12:decimal(7,2)) -> boolean) -> boolean) -> boolean predicate: (((CAST( csmallint AS decimal(7,2)) > -26.28) and (cstring2 like 'ss')) or ((cdouble <= UDFToDouble(cbigint)) and (cstring1 >= 'ss') and (UDFToDouble(cint) <> cdouble)) or (UDFToInteger(ctinyint) = -89010) or ((UDFToFloat(cbigint) <= cfloat) and (-26.28 <= CAST( csmallint AS decimal(7,2))))) (type: boolean) - Statistics: Num rows: 10922 Data size: 335301 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10922 Data size: 2348269 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cint (type: int), cstring1 (type: string), cboolean2 (type: boolean), ctimestamp2 (type: timestamp), cdouble (type: double), cfloat (type: float), cbigint (type: bigint), csmallint (type: smallint), cboolean1 (type: boolean), (cint + UDFToInteger(csmallint)) (type: int), (cbigint - UDFToLong(ctinyint)) (type: bigint), (- cbigint) (type: bigint), (- cfloat) (type: float), ((cbigint - UDFToLong(ctinyint)) + cbigint) (type: bigint), (cdouble / cdouble) (type: double), (- cdouble) (type: double), (UDFToLong((cint + UDFToInteger(csmallint))) * (- cbigint)) (type: bigint), ((- cdouble) + UDFToDouble(cbigint)) (type: double), (-1.389 / CAST( ctinyint AS decimal(3,0))) (type: decimal(8,7)), (UDFToDouble(cbigint) % cdouble) (type: double), (- csmallint) (type: smallint), (UDFToInteger(csmallint) + (cint + UDFToInteger(csmallint))) (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21 @@ -1547,7 +1547,7 @@ STAGE PLANS: native: true projectedOutputColumns: [2, 6, 11, 9, 5, 4, 3, 1, 10, 14, 15, 16, 13, 18, 19, 20, 22, 25, 27, 24, 17, 28] selectExpressions: LongColAddLongColumn(col 2, col 1)(children: col 1) -> 14:long, LongColSubtractLongColumn(col 3, col 0)(children: col 0) -> 15:long, LongColUnaryMinus(col 3) -> 16:long, DoubleColUnaryMinus(col 4) -> 13:double, LongColAddLongColumn(col 17, col 3)(children: LongColSubtractLongColumn(col 3, col 0)(children: col 0) -> 17:long) -> 18:long, DoubleColDivideDoubleColumn(col 5, col 5) -> 19:double, DoubleColUnaryMinus(col 5) -> 20:double, LongColMultiplyLongColumn(col 17, col 21)(children: col 17, LongColUnaryMinus(col 3) -> 21:long) -> 22:long, DoubleColAddDoubleColumn(col 23, col 24)(children: DoubleColUnaryMinus(col 5) -> 23:double, CastLongToDouble(col 3) -> 24:double) -> 25:double, DecimalScalarDivideDecimalColumn(val -1.389, col 26)(children: CastLongToDecimal(col 0) -> 26:decimal(3,0)) -> 27:decimal(8,7), DoubleColModuloDoubleColumn(col 23, col 5)(children: CastLongToDouble(col 3) -> 23:double) -> 24:double, LongColUnaryMinus(col 1) -> 17:long, LongColAddLongColumn(col 1, col 21)(children: col 1, LongColAddLongColumn(col 2, col 1)(children: col 1) -> 21:long) -> 28:long - Statistics: Num rows: 10922 Data size: 335301 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10922 Data size: 2348269 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col8 (type: boolean), _col1 (type: string), _col3 (type: timestamp), _col5 (type: float), _col6 (type: bigint), _col1 (type: string), _col4 (type: double), _col0 (type: int), _col7 (type: smallint), _col4 (type: double), _col9 (type: int), _col10 (type: bigint), _col11 (type: bigint), _col12 (type: float), _col13 (type: bigint), _col14 (type: double), _col15 (type: double), _col16 (type: bigint), _col17 (type: double), _col18 (type: decimal(8,7)), _col19 (type: double), _col20 (type: smallint), _col21 (type: int) sort order: +++++++++++++++++++++++ @@ -1556,7 +1556,7 @@ STAGE PLANS: native: false nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true nativeConditionsNotMet: No TopN IS false, Uniform Hash IS false - Statistics: Num rows: 10922 Data size: 335301 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10922 Data size: 2348269 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col2 (type: boolean) Execution mode: vectorized @@ -1585,19 +1585,19 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [7, 1, 23, 2, 6, 3, 4, 8, 0, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22] - Statistics: Num rows: 10922 Data size: 335301 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10922 Data size: 2348269 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 75 Limit Vectorization: className: VectorLimitOperator native: true - Statistics: Num rows: 75 Data size: 2250 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 75 Data size: 16125 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 75 Data size: 2250 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 75 Data size: 16125 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1832,7 +1832,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesorc - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] @@ -1842,7 +1842,7 @@ STAGE PLANS: native: true predicateExpression: FilterExprOrExpr(children: FilterExprAndExpr(children: FilterDecimalScalarGreaterEqualDecimalColumn(val -1.389, col 12)(children: CastLongToDecimal(col 2) -> 12:decimal(13,3)) -> boolean, FilterLongColLessLongColumn(col 1, col 0)(children: col 0) -> boolean, FilterLongScalarGreaterLongColumn(val -6432, col 1)(children: col 1) -> boolean) -> boolean, FilterExprAndExpr(children: FilterDoubleColGreaterEqualDoubleColumn(col 5, col 4)(children: col 4) -> boolean, FilterStringGroupColLessEqualStringScalar(col 7, val a) -> boolean) -> boolean, FilterExprAndExpr(children: FilterStringColLikeStringScalar(col 6, pattern ss%) -> boolean, FilterDecimalScalarGreaterDecimalColumn(val 10.175, col 13)(children: CastLongToDecimal(col 3) -> 13:decimal(22,3)) -> boolean) -> boolean) -> boolean predicate: (((-1.389 >= CAST( cint AS decimal(13,3))) and (csmallint < UDFToShort(ctinyint)) and (-6432 > UDFToInteger(csmallint))) or ((cdouble >= UDFToDouble(cfloat)) and (cstring2 <= 'a')) or ((cstring1 like 'ss%') and (10.175 > CAST( cbigint AS decimal(22,3))))) (type: boolean) - Statistics: Num rows: 3868 Data size: 118746 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3868 Data size: 831633 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ctimestamp1 (type: timestamp), cstring2 (type: string), cdouble (type: double), cfloat (type: float), cbigint (type: bigint), csmallint (type: smallint), (UDFToDouble(cbigint) / 3569.0) (type: double), (-257 - UDFToInteger(csmallint)) (type: int), (-6432.0 * cfloat) (type: float), (- cdouble) (type: double), (cdouble * 10.175) (type: double), (UDFToDouble((-6432.0 * cfloat)) / UDFToDouble(cfloat)) (type: double), (- cfloat) (type: float), (cint % UDFToInteger(csmallint)) (type: int), (- cdouble) (type: double), (cdouble * (- cdouble)) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15 @@ -1851,7 +1851,7 @@ STAGE PLANS: native: true projectedOutputColumns: [8, 7, 5, 4, 3, 1, 15, 16, 14, 17, 18, 20, 19, 21, 22, 24] selectExpressions: DoubleColDivideDoubleScalar(col 14, val 3569.0)(children: CastLongToDouble(col 3) -> 14:double) -> 15:double, LongScalarSubtractLongColumn(val -257, col 1)(children: col 1) -> 16:long, DoubleScalarMultiplyDoubleColumn(val -6432.0, col 4) -> 14:double, DoubleColUnaryMinus(col 5) -> 17:double, DoubleColMultiplyDoubleScalar(col 5, val 10.175) -> 18:double, DoubleColDivideDoubleColumn(col 19, col 4)(children: col 19, col 4) -> 20:double, DoubleColUnaryMinus(col 4) -> 19:double, LongColModuloLongColumn(col 2, col 1)(children: col 1) -> 21:long, DoubleColUnaryMinus(col 5) -> 22:double, DoubleColMultiplyDoubleColumn(col 5, col 23)(children: DoubleColUnaryMinus(col 5) -> 23:double) -> 24:double - Statistics: Num rows: 3868 Data size: 118746 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3868 Data size: 831633 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col5 (type: smallint), _col1 (type: string), _col2 (type: double), _col3 (type: float), _col4 (type: bigint), _col6 (type: double), _col7 (type: int), _col8 (type: float), _col9 (type: double), _col10 (type: double), _col11 (type: double), _col12 (type: float), _col13 (type: int), _col14 (type: double), _col15 (type: double) sort order: +++++++++++++++ @@ -1860,7 +1860,7 @@ STAGE PLANS: native: false nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true nativeConditionsNotMet: No TopN IS false, Uniform Hash IS false - Statistics: Num rows: 3868 Data size: 118746 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3868 Data size: 831633 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: timestamp) Execution mode: vectorized @@ -1889,19 +1889,19 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [15, 1, 2, 3, 4, 0, 5, 6, 7, 8, 9, 10, 11, 12, 8, 14] - Statistics: Num rows: 3868 Data size: 118746 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3868 Data size: 831633 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 45 Limit Vectorization: className: VectorLimitOperator native: true - Statistics: Num rows: 45 Data size: 1350 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 45 Data size: 9675 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 45 Data size: 1350 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 45 Data size: 9675 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -2078,7 +2078,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesorc - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] @@ -2088,7 +2088,7 @@ STAGE PLANS: native: true predicateExpression: FilterExprAndExpr(children: FilterLongColGreaterEqualLongScalar(col 1, val -257)(children: col 1) -> boolean, FilterExprOrExpr(children: FilterLongScalarEqualLongColumn(val -6432, col 1)(children: col 1) -> boolean, FilterExprAndExpr(children: FilterDoubleColGreaterEqualDoubleColumn(col 12, col 5)(children: CastLongToDouble(col 2) -> 12:double) -> boolean, FilterLongColLessEqualLongColumn(col 0, col 2)(children: col 0) -> boolean) -> boolean) -> boolean) -> boolean predicate: ((UDFToInteger(csmallint) >= -257) and ((-6432 = UDFToInteger(csmallint)) or ((UDFToDouble(cint) >= cdouble) and (UDFToInteger(ctinyint) <= cint)))) (type: boolean) - Statistics: Num rows: 2503 Data size: 76841 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2503 Data size: 538153 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: csmallint (type: smallint), cbigint (type: bigint), ctinyint (type: tinyint) outputColumnNames: csmallint, cbigint, ctinyint @@ -2096,7 +2096,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [1, 3, 0] - Statistics: Num rows: 2503 Data size: 76841 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2503 Data size: 538153 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: stddev_samp(csmallint), sum(cbigint), var_pop(ctinyint), count() Group By Vectorization: @@ -2110,12 +2110,12 @@ STAGE PLANS: keys: csmallint (type: smallint) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 2503 Data size: 76841 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2503 Data size: 538153 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: smallint) sort order: + Map-reduce partition columns: _col0 (type: smallint) - Statistics: Num rows: 2503 Data size: 76841 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2503 Data size: 538153 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: struct), _col2 (type: bigint), _col3 (type: struct), _col4 (type: bigint) Execution mode: vectorized Map Vectorization: @@ -2138,15 +2138,15 @@ STAGE PLANS: keys: KEY._col0 (type: smallint) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1251 Data size: 38405 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1251 Data size: 268968 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: smallint), (UDFToInteger(_col0) % -75) (type: int), _col1 (type: double), (-1.389 / CAST( _col0 AS decimal(5,0))) (type: decimal(10,9)), _col2 (type: bigint), (UDFToDouble((UDFToInteger(_col0) % -75)) / UDFToDouble(_col2)) (type: double), (- (UDFToInteger(_col0) % -75)) (type: int), _col3 (type: double), (- (- (UDFToInteger(_col0) % -75))) (type: int), _col4 (type: bigint), (_col4 - -89010) (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 - Statistics: Num rows: 1251 Data size: 38405 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1251 Data size: 268968 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: double), _col3 (type: decimal(10,9)), _col4 (type: bigint), _col5 (type: double), _col6 (type: int), _col7 (type: double), _col8 (type: int), _col9 (type: bigint), _col10 (type: bigint) sort order: +++++++++++ - Statistics: Num rows: 1251 Data size: 38405 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1251 Data size: 268968 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 Reducer 3 Execution mode: vectorized @@ -2165,19 +2165,19 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] - Statistics: Num rows: 1251 Data size: 38405 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1251 Data size: 268968 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 20 Limit Vectorization: className: VectorLimitOperator native: true - Statistics: Num rows: 20 Data size: 600 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 4300 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 20 Data size: 600 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 4300 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -2330,7 +2330,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesorc - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] @@ -2340,7 +2340,7 @@ STAGE PLANS: native: true predicateExpression: FilterExprAndExpr(children: FilterDoubleColGreaterDoubleScalar(col 5, val 2563.58) -> boolean, FilterExprOrExpr(children: FilterExprAndExpr(children: FilterLongColGreaterEqualLongColumn(col 3, col 2)(children: col 2) -> boolean, FilterLongColLessLongColumn(col 1, col 2)(children: col 1) -> boolean, FilterDoubleColLessDoubleScalar(col 4, val -5638.14990234375) -> boolean) -> boolean, FilterDecimalScalarEqualDecimalColumn(val 2563.58, col 12)(children: CastLongToDecimal(col 0) -> 12:decimal(6,2)) -> boolean, FilterExprAndExpr(children: FilterDoubleColLessEqualDoubleColumn(col 5, col 13)(children: CastLongToDouble(col 3) -> 13:double) -> boolean, FilterDecimalScalarGreaterDecimalColumn(val -5638.15, col 14)(children: CastLongToDecimal(col 3) -> 14:decimal(21,2)) -> boolean) -> boolean) -> boolean) -> boolean predicate: ((cdouble > 2563.58) and (((cbigint >= UDFToLong(cint)) and (UDFToInteger(csmallint) < cint) and (cfloat < -5638.15)) or (2563.58 = CAST( ctinyint AS decimal(6,2))) or ((cdouble <= UDFToDouble(cbigint)) and (-5638.15 > CAST( cbigint AS decimal(21,2)))))) (type: boolean) - Statistics: Num rows: 2654 Data size: 81476 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2654 Data size: 570619 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cdouble (type: double), cfloat (type: float) outputColumnNames: cdouble, cfloat @@ -2348,7 +2348,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [5, 4] - Statistics: Num rows: 2654 Data size: 81476 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2654 Data size: 570619 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: var_samp(cdouble), count(cfloat), sum(cfloat), var_pop(cdouble), stddev_pop(cdouble), sum(cdouble) Group By Vectorization: @@ -2362,12 +2362,12 @@ STAGE PLANS: keys: cdouble (type: double) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 2654 Data size: 81476 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2654 Data size: 570619 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: double) sort order: + Map-reduce partition columns: _col0 (type: double) - Statistics: Num rows: 2654 Data size: 81476 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2654 Data size: 570619 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: struct), _col2 (type: bigint), _col3 (type: double), _col4 (type: struct), _col5 (type: struct), _col6 (type: double) Execution mode: vectorized Map Vectorization: @@ -2390,15 +2390,15 @@ STAGE PLANS: keys: KEY._col0 (type: double) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 1327 Data size: 40738 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1327 Data size: 285309 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: double), _col1 (type: double), _col5 (type: double), (_col0 + _col1) (type: double), (_col0 * 762.0) (type: double), _col6 (type: double), (-863.257 % (_col0 * 762.0)) (type: double), (2563.58 * _col1) (type: double), (- _col1) (type: double), _col2 (type: bigint), ((2563.58 * _col1) + -5638.15) (type: double), ((- _col1) * ((2563.58 * _col1) + -5638.15)) (type: double), _col3 (type: double), _col4 (type: double), (_col0 - (- _col1)) (type: double) outputColumnNames: _col0, _col1, _col10, _col11, _col12, _col13, _col14, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 - Statistics: Num rows: 1327 Data size: 40738 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1327 Data size: 285309 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: double) sort order: + - Statistics: Num rows: 1327 Data size: 40738 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1327 Data size: 285309 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: double), _col2 (type: double), _col3 (type: double), _col4 (type: bigint), _col5 (type: double), _col6 (type: double), _col7 (type: double), _col8 (type: double), _col9 (type: double), _col10 (type: double), _col11 (type: double), _col12 (type: double), _col13 (type: double), _col14 (type: double) Reducer 3 Execution mode: vectorized @@ -2417,13 +2417,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 13] - Statistics: Num rows: 1327 Data size: 40738 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1327 Data size: 285309 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 1327 Data size: 40738 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1327 Data size: 285309 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -2626,7 +2626,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesorc - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] @@ -2636,7 +2636,7 @@ STAGE PLANS: native: true predicateExpression: FilterExprAndExpr(children: FilterDoubleColNotEqualDoubleScalar(col 12, val 0.0)(children: CastTimestampToDouble(col 8) -> 12:double) -> boolean, FilterExprOrExpr(children: FilterExprAndExpr(children: FilterLongScalarNotEqualLongColumn(val -257, col 0)(children: col 0) -> boolean, SelectColumnIsNotNull(col 11) -> boolean, FilterStringColRegExpStringScalar(col 6, pattern .*ss) -> boolean, FilterDoubleScalarLessDoubleColumn(val -3.0, col 12)(children: CastTimestampToDouble(col 8) -> 12:double) -> boolean) -> boolean, FilterDoubleColEqualDoubleScalar(col 12, val -5.0)(children: CastTimestampToDouble(col 9) -> 12:double) -> boolean, FilterExprAndExpr(children: FilterDoubleColLessDoubleScalar(col 12, val 0.0)(children: CastTimestampToDouble(col 8) -> 12:double) -> boolean, FilterStringColLikeStringScalar(col 7, pattern %b%) -> boolean) -> boolean, FilterDoubleColEqualDoubleColumn(col 5, col 12)(children: CastLongToDouble(col 2) -> 12:double) -> boolean, FilterExprAndExpr(children: SelectColumnIsNull(col 10) -> boolean, FilterDoubleColLessDoubleColumn(col 4, col 12)(children: CastLongToFloatViaLongToDouble(col 2) -> 12:double) -> boolean) -> boolean) -> boolean) -> boolean predicate: ((UDFToDouble(ctimestamp1) <> 0.0) and (((-257 <> UDFToInteger(ctinyint)) and cboolean2 is not null and cstring1 regexp '.*ss' and (-3.0 < UDFToDouble(ctimestamp1))) or (UDFToDouble(ctimestamp2) = -5.0) or ((UDFToDouble(ctimestamp1) < 0.0) and (cstring2 like '%b%')) or (cdouble = UDFToDouble(cint)) or (cboolean1 is null and (cfloat < UDFToFloat(cint))))) (type: boolean) - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ctimestamp1 (type: timestamp), cstring1 (type: string), cint (type: int), csmallint (type: smallint), ctinyint (type: tinyint), cfloat (type: float), cdouble (type: double) outputColumnNames: ctimestamp1, cstring1, cint, csmallint, ctinyint, cfloat, cdouble @@ -2644,7 +2644,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [8, 6, 2, 1, 0, 4, 5] - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: stddev_pop(cint), avg(csmallint), count(), min(ctinyint), var_samp(csmallint), var_pop(cfloat), avg(cint), var_samp(cfloat), avg(cfloat), min(cdouble), var_pop(csmallint), stddev_pop(ctinyint), sum(cint) Group By Vectorization: @@ -2658,12 +2658,12 @@ STAGE PLANS: keys: ctimestamp1 (type: timestamp), cstring1 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: timestamp), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: timestamp), _col1 (type: string) - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: bigint), _col5 (type: tinyint), _col6 (type: struct), _col7 (type: struct), _col8 (type: struct), _col9 (type: struct), _col10 (type: struct), _col11 (type: double), _col12 (type: struct), _col13 (type: struct), _col14 (type: bigint) Execution mode: vectorized Map Vectorization: @@ -2686,15 +2686,15 @@ STAGE PLANS: keys: KEY._col0 (type: timestamp), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 - Statistics: Num rows: 6144 Data size: 188618 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: timestamp), _col1 (type: string), _col2 (type: double), (_col2 * 10.175) (type: double), (- _col2) (type: double), _col3 (type: double), (- _col2) (type: double), (-26.28 - _col2) (type: double), _col4 (type: bigint), (- _col4) (type: bigint), ((-26.28 - _col2) * (- _col2)) (type: double), _col5 (type: tinyint), (((-26.28 - _col2) * (- _col2)) * UDFToDouble((- _col4))) (type: double), (- (_col2 * 10.175)) (type: double), _col6 (type: double), (_col6 + (((-26.28 - _col2) * (- _col2)) * UDFToDouble((- _col4)))) (type: double), (- (- _col2)) (type: double), (UDFToDouble((- _col4)) / _col2) (type: double), _col7 (type: double), (10.175 / _col3) (type: double), _col8 (type: double), _col9 (type: double), ((_col6 + (((-26.28 - _col2) * (- _col2)) * UDFToDouble((- _col4)))) - (((-26.28 - _col2) * (- _col2)) * UDFToDouble((- _col4)))) (type: double), (- (- (_col2 * 10.175))) (type: double), _col10 (type: double), (((_col6 + (((-26.28 - _col2) * (- _col2)) * UDFToDouble((- _col4)))) - (((-26.28 - _col2) * (- _col2)) * UDFToDouble((- _col4)))) * 10.175) (type: double), (10.175 % (10.175 / _col3)) (type: double), (- _col5) (type: tinyint), _col11 (type: double), _col12 (type: double), (- ((-26.28 - _col2) * (- _col2))) (type: double), ((- _col2) % _col10) (type: double), (-26.28 / CAST( (- _col5) AS decimal(3,0))) (type: decimal(8,6)), _col13 (type: double), _col14 (type: bigint), ((_col6 + (((-26.28 - _col2) * (- _col2)) * UDFToDouble((- _col4)))) / _col7) (type: double), (- (- _col4)) (type: bigint), _col4 (type: bigint), ((_col6 + (((-26.28 - _col2) * (- _col2)) * UDFToDouble((- _col4)))) % -26.28) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35, _col36, _col37, _col38 - Statistics: Num rows: 6144 Data size: 188618 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: timestamp), _col1 (type: string), _col2 (type: double), _col3 (type: double), _col4 (type: double), _col5 (type: double), _col6 (type: double), _col7 (type: double), _col8 (type: bigint), _col9 (type: bigint), _col10 (type: double), _col11 (type: tinyint), _col12 (type: double), _col13 (type: double), _col14 (type: double), _col15 (type: double), _col16 (type: double), _col17 (type: double), _col18 (type: double), _col19 (type: double), _col20 (type: double), _col21 (type: double), _col22 (type: double), _col23 (type: double), _col24 (type: double), _col25 (type: double), _col26 (type: double), _col27 (type: tinyint), _col28 (type: double), _col29 (type: double), _col30 (type: double), _col31 (type: double), _col32 (type: decimal(8,6)), _col33 (type: double), _col34 (type: bigint), _col35 (type: double), _col36 (type: bigint), _col37 (type: bigint), _col38 (type: double) sort order: +++++++++++++++++++++++++++++++++++++++ - Statistics: Num rows: 6144 Data size: 188618 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 Reducer 3 Execution mode: vectorized @@ -2713,19 +2713,19 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [0, 1, 2, 3, 4, 5, 4, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 8, 38] - Statistics: Num rows: 6144 Data size: 188618 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 50 Limit Vectorization: className: VectorLimitOperator native: true - Statistics: Num rows: 50 Data size: 1500 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 50 Data size: 10750 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 50 Data size: 1500 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 50 Data size: 10750 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -3003,7 +3003,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesorc - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] @@ -3013,7 +3013,7 @@ STAGE PLANS: native: true predicateExpression: FilterExprAndExpr(children: FilterExprOrExpr(children: FilterExprAndExpr(children: FilterDoubleColLessDoubleColumn(col 5, col 12)(children: CastLongToDouble(col 1) -> 12:double) -> boolean, FilterLongColEqualLongColumn(col 11, col 10) -> boolean, FilterDecimalColLessEqualDecimalScalar(col 13, val -863.257)(children: CastLongToDecimal(col 3) -> 13:decimal(22,3)) -> boolean) -> boolean, FilterExprAndExpr(children: FilterLongColGreaterEqualLongScalar(col 2, val -257) -> boolean, SelectColumnIsNotNull(col 6) -> boolean, FilterLongColGreaterEqualLongScalar(col 10, val 1) -> boolean) -> boolean, FilterStringColRegExpStringScalar(col 7, pattern b) -> boolean, FilterExprAndExpr(children: FilterLongColGreaterEqualLongColumn(col 1, col 0)(children: col 0) -> boolean, SelectColumnIsNull(col 9) -> boolean) -> boolean) -> boolean, SelectColumnIsNotNull(col 10) -> boolean) -> boolean predicate: ((((cdouble < UDFToDouble(csmallint)) and (cboolean2 = cboolean1) and (CAST( cbigint AS decimal(22,3)) <= -863.257)) or ((cint >= -257) and cstring1 is not null and (cboolean1 >= 1)) or cstring2 regexp 'b' or ((csmallint >= UDFToShort(ctinyint)) and ctimestamp2 is null)) and cboolean1 is not null) (type: boolean) - Statistics: Num rows: 10239 Data size: 314333 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10239 Data size: 2201421 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cboolean1 (type: boolean), cfloat (type: float), cbigint (type: bigint), cint (type: int), cdouble (type: double), ctinyint (type: tinyint), csmallint (type: smallint) outputColumnNames: cboolean1, cfloat, cbigint, cint, cdouble, ctinyint, csmallint @@ -3021,7 +3021,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [10, 4, 3, 2, 5, 0, 1] - Statistics: Num rows: 10239 Data size: 314333 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10239 Data size: 2201421 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: max(cfloat), sum(cbigint), var_samp(cint), avg(cdouble), min(cbigint), var_pop(cbigint), sum(cint), stddev_samp(ctinyint), stddev_pop(csmallint), avg(cint) Group By Vectorization: @@ -3035,12 +3035,12 @@ STAGE PLANS: keys: cboolean1 (type: boolean) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 - Statistics: Num rows: 10239 Data size: 314333 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10239 Data size: 2201421 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: boolean) sort order: + Map-reduce partition columns: _col0 (type: boolean) - Statistics: Num rows: 10239 Data size: 314333 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10239 Data size: 2201421 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: float), _col2 (type: bigint), _col3 (type: struct), _col4 (type: struct), _col5 (type: bigint), _col6 (type: struct), _col7 (type: bigint), _col8 (type: struct), _col9 (type: struct), _col10 (type: struct) Execution mode: vectorized Map Vectorization: @@ -3063,15 +3063,15 @@ STAGE PLANS: keys: KEY._col0 (type: boolean) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 - Statistics: Num rows: 5119 Data size: 157151 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5119 Data size: 1100602 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: boolean), _col1 (type: float), (UDFToDouble((CAST( _col2 AS decimal(19,0)) - 10.175)) + _col3) (type: double), _col5 (type: bigint), _col6 (type: double), (- (10.175 + (- _col1))) (type: float), (79.553 / _col6) (type: double), (_col3 % (79.553 / _col6)) (type: double), _col7 (type: bigint), _col8 (type: double), (-1.389 * CAST( _col5 AS decimal(19,0))) (type: decimal(24,3)), (- _col1) (type: float), (CAST( _col7 AS decimal(19,0)) - (-1.389 * CAST( _col5 AS decimal(19,0)))) (type: decimal(25,3)), _col9 (type: double), (- (CAST( _col7 AS decimal(19,0)) - (-1.389 * CAST( _col5 AS decimal(19,0))))) (type: decimal(25,3)), _col10 (type: double), (- _col10) (type: double), (_col10 * UDFToDouble(_col7)) (type: double), (-26.28 / UDFToDouble(_col1)) (type: double), _col2 (type: bigint), (CAST( _col2 AS decimal(19,0)) - 10.175) (type: decimal(23,3)), _col3 (type: double), (_col3 % UDFToDouble(_col1)) (type: double), (10.175 + (- _col1)) (type: float), _col4 (type: double) outputColumnNames: _col0, _col1, _col10, _col11, _col12, _col13, _col14, _col15, _col17, _col18, _col19, _col2, _col20, _col21, _col22, _col23, _col24, _col25, _col3, _col4, _col5, _col6, _col7, _col8, _col9 - Statistics: Num rows: 5119 Data size: 157151 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5119 Data size: 1100602 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: boolean) sort order: + - Statistics: Num rows: 5119 Data size: 157151 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5119 Data size: 1100602 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: float), _col2 (type: float), _col3 (type: double), _col4 (type: bigint), _col5 (type: decimal(23,3)), _col6 (type: double), _col7 (type: double), _col8 (type: float), _col9 (type: double), _col10 (type: double), _col11 (type: bigint), _col12 (type: double), _col13 (type: float), _col14 (type: double), _col15 (type: double), _col17 (type: bigint), _col18 (type: double), _col19 (type: decimal(24,3)), _col20 (type: decimal(25,3)), _col21 (type: double), _col22 (type: decimal(25,3)), _col23 (type: double), _col24 (type: double), _col25 (type: double) Reducer 3 Execution mode: vectorized @@ -3090,13 +3090,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 13, 16, 17, 18, 19, 20, 21, 22, 23, 24] - Statistics: Num rows: 5119 Data size: 157151 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5119 Data size: 1100602 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 5119 Data size: 157151 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5119 Data size: 1100602 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/spark/vectorized_case.q.out b/ql/src/test/results/clientpositive/spark/vectorized_case.q.out index 67ed3dd..b99af0f 100644 --- a/ql/src/test/results/clientpositive/spark/vectorized_case.q.out +++ b/ql/src/test/results/clientpositive/spark/vectorized_case.q.out @@ -51,7 +51,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesorc - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] @@ -61,7 +61,7 @@ STAGE PLANS: native: true predicateExpression: FilterExprOrExpr(children: FilterLongColEqualLongScalar(col 1, val 418) -> boolean, FilterLongColEqualLongScalar(col 1, val 12205) -> boolean, FilterLongColEqualLongScalar(col 1, val 10583) -> boolean) -> boolean predicate: ((csmallint = 418) or (csmallint = 12205) or (csmallint = 10583)) (type: boolean) - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: csmallint (type: smallint), CASE WHEN ((csmallint = 418)) THEN ('a') WHEN ((csmallint = 12205)) THEN ('b') ELSE ('c') END (type: string), CASE WHEN ((csmallint = 418)) THEN ('a') WHEN ((csmallint = 12205)) THEN ('b') ELSE ('c') END (type: string) outputColumnNames: _col0, _col1, _col2 @@ -70,13 +70,13 @@ STAGE PLANS: native: true projectedOutputColumns: [1, 14, 15] selectExpressions: VectorUDFAdaptor(CASE WHEN ((csmallint = 418)) THEN ('a') WHEN ((csmallint = 12205)) THEN ('b') ELSE ('c') END)(children: LongColEqualLongScalar(col 1, val 418) -> 12:long, LongColEqualLongScalar(col 1, val 12205) -> 13:long) -> 14:string, VectorUDFAdaptor(CASE WHEN ((csmallint = 418)) THEN ('a') WHEN ((csmallint = 12205)) THEN ('b') ELSE ('c') END)(children: LongColEqualLongScalar(col 1, val 418) -> 12:long, LongColEqualLongScalar(col 1, val 12205) -> 13:long) -> 15:string - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -192,7 +192,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesorc - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] @@ -202,7 +202,7 @@ STAGE PLANS: native: true predicateExpression: FilterExprOrExpr(children: FilterLongColEqualLongScalar(col 1, val 418) -> boolean, FilterLongColEqualLongScalar(col 1, val 12205) -> boolean, FilterLongColEqualLongScalar(col 1, val 10583) -> boolean) -> boolean predicate: ((csmallint = 418) or (csmallint = 12205) or (csmallint = 10583)) (type: boolean) - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: csmallint (type: smallint), CASE WHEN ((csmallint = 418)) THEN ('a') WHEN ((csmallint = 12205)) THEN ('b') ELSE (null) END (type: string), CASE WHEN ((csmallint = 418)) THEN ('a') WHEN ((csmallint = 12205)) THEN (null) ELSE ('c') END (type: string) outputColumnNames: _col0, _col1, _col2 @@ -211,13 +211,13 @@ STAGE PLANS: native: true projectedOutputColumns: [1, 14, 15] selectExpressions: VectorUDFAdaptor(CASE WHEN ((csmallint = 418)) THEN ('a') WHEN ((csmallint = 12205)) THEN ('b') ELSE (null) END)(children: LongColEqualLongScalar(col 1, val 418) -> 12:long, LongColEqualLongScalar(col 1, val 12205) -> 13:long) -> 14:string, VectorUDFAdaptor(CASE WHEN ((csmallint = 418)) THEN ('a') WHEN ((csmallint = 12205)) THEN (null) ELSE ('c') END)(children: LongColEqualLongScalar(col 1, val 418) -> 12:long, LongColEqualLongScalar(col 1, val 12205) -> 13:long) -> 15:string - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/spark/vectorized_mapjoin.q.out b/ql/src/test/results/clientpositive/spark/vectorized_mapjoin.q.out index 030a71b..c69fcab 100644 --- a/ql/src/test/results/clientpositive/spark/vectorized_mapjoin.q.out +++ b/ql/src/test/results/clientpositive/spark/vectorized_mapjoin.q.out @@ -24,7 +24,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: t2 - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] @@ -34,7 +34,7 @@ STAGE PLANS: native: true predicateExpression: SelectColumnIsNotNull(col 2) -> boolean predicate: cint is not null (type: boolean) - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cint (type: int) outputColumnNames: _col0 @@ -42,7 +42,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [2] - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator Spark Hash Table Sink Vectorization: className: VectorSparkHashTableSinkOperator @@ -72,7 +72,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: t1 - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] @@ -82,7 +82,7 @@ STAGE PLANS: native: true predicateExpression: SelectColumnIsNotNull(col 2) -> boolean predicate: cint is not null (type: boolean) - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cint (type: int) outputColumnNames: _col0 @@ -90,7 +90,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [2] - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 @@ -104,7 +104,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1 input vertices: 1 Map 3 - Statistics: Num rows: 13516 Data size: 414960 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13516 Data size: 2906160 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: int), (_col0 + _col1) (type: int) outputColumnNames: _col0, _col1, _col2 @@ -113,7 +113,7 @@ STAGE PLANS: native: true projectedOutputColumns: [2, 2, 12] selectExpressions: LongColAddLongColumn(col 2, col 2) -> 12:long - Statistics: Num rows: 13516 Data size: 414960 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13516 Data size: 2906160 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(_col0), max(_col1), min(_col0), avg(_col2) Group By Vectorization: diff --git a/ql/src/test/results/clientpositive/spark/vectorized_math_funcs.q.out b/ql/src/test/results/clientpositive/spark/vectorized_math_funcs.q.out index 90ef576..3780a4a 100644 --- a/ql/src/test/results/clientpositive/spark/vectorized_math_funcs.q.out +++ b/ql/src/test/results/clientpositive/spark/vectorized_math_funcs.q.out @@ -119,7 +119,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesorc - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] @@ -129,7 +129,7 @@ STAGE PLANS: native: true predicateExpression: FilterExprAndExpr(children: FilterLongColEqualLongScalar(col 12, val 0)(children: LongColModuloLongScalar(col 3, val 500) -> 12:long) -> boolean, FilterDoubleColGreaterEqualDoubleScalar(col 13, val -1.0)(children: FuncSinDoubleToDouble(col 4) -> 13:double) -> boolean) -> boolean predicate: (((cbigint % 500) = 0) and (sin(cfloat) >= -1.0)) (type: boolean) - Statistics: Num rows: 2048 Data size: 62872 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2048 Data size: 440327 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cdouble (type: double), round(cdouble, 2) (type: double), floor(cdouble) (type: bigint), ceil(cdouble) (type: bigint), rand() (type: double), rand(98007) (type: double), exp(ln(cdouble)) (type: double), ln(cdouble) (type: double), ln(cfloat) (type: double), log10(cdouble) (type: double), log2(cdouble) (type: double), log2((cdouble - 15601.0)) (type: double), log2(cfloat) (type: double), log2(cbigint) (type: double), log2(cint) (type: double), log2(csmallint) (type: double), log2(ctinyint) (type: double), log(2, cdouble) (type: double), power(log2(cdouble), 2) (type: double), power(log2(cdouble), 2) (type: double), sqrt(cdouble) (type: double), sqrt(cbigint) (type: double), bin(cbigint) (type: string), hex(cdouble) (type: string), conv(cbigint, 10, 16) (type: string), abs(cdouble) (type: double), abs(ctinyint) (type: int), (cint pmod 3) (type: int), sin(cdouble) (type: double), asin(cdouble) (type: double), cos(cdouble) (type: double), acos(cdouble) (type: double), atan(cdouble) (type: double), degrees(cdouble) (type: double), radians(cdouble) (type: double), cdouble (type: double), cbigint (type: bigint), (- cdouble) (type: double), sign(cdouble) (type: double), sign(cbigint) (type: double), cos(((- sin(log(cdouble))) + 3.14159)) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35, _col36, _col37, _col38, _col39, _col40 @@ -138,13 +138,13 @@ STAGE PLANS: native: true projectedOutputColumns: [5, 13, 12, 14, 15, 16, 18, 17, 19, 20, 21, 23, 22, 24, 25, 26, 27, 28, 30, 31, 29, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 5, 3, 46, 47, 48, 49] selectExpressions: RoundWithNumDigitsDoubleToDouble(col 5, decimalPlaces 2) -> 13:double, FuncFloorDoubleToLong(col 5) -> 12:long, FuncCeilDoubleToLong(col 5) -> 14:long, FuncRandNoSeed -> 15:double, FuncRand -> 16:double, FuncExpDoubleToDouble(col 17)(children: FuncLnDoubleToDouble(col 5) -> 17:double) -> 18:double, FuncLnDoubleToDouble(col 5) -> 17:double, FuncLnDoubleToDouble(col 4) -> 19:double, FuncLog10DoubleToDouble(col 5) -> 20:double, FuncLog2DoubleToDouble(col 5) -> 21:double, FuncLog2DoubleToDouble(col 22)(children: DoubleColSubtractDoubleScalar(col 5, val 15601.0) -> 22:double) -> 23:double, FuncLog2DoubleToDouble(col 4) -> 22:double, FuncLog2LongToDouble(col 3) -> 24:double, FuncLog2LongToDouble(col 2) -> 25:double, FuncLog2LongToDouble(col 1) -> 26:double, FuncLog2LongToDouble(col 0) -> 27:double, VectorUDFAdaptor(log(2, cdouble)) -> 28:double, VectorUDFAdaptor(power(log2(cdouble), 2))(children: FuncLog2DoubleToDouble(col 5) -> 29:double) -> 30:double, VectorUDFAdaptor(power(log2(cdouble), 2))(children: FuncLog2DoubleToDouble(col 5) -> 29:double) -> 31:double, FuncSqrtDoubleToDouble(col 5) -> 29:double, FuncSqrtLongToDouble(col 3) -> 32:double, FuncBin(col 3) -> 33:String, VectorUDFAdaptor(hex(cdouble)) -> 34:string, VectorUDFAdaptor(conv(cbigint, 10, 16)) -> 35:string, FuncAbsDoubleToDouble(col 5) -> 36:double, FuncAbsLongToLong(col 0) -> 37:long, PosModLongToLong(col 2, divisor 3) -> 38:long, FuncSinDoubleToDouble(col 5) -> 39:double, FuncASinDoubleToDouble(col 5) -> 40:double, FuncCosDoubleToDouble(col 5) -> 41:double, FuncACosDoubleToDouble(col 5) -> 42:double, FuncATanDoubleToDouble(col 5) -> 43:double, FuncDegreesDoubleToDouble(col 5) -> 44:double, FuncRadiansDoubleToDouble(col 5) -> 45:double, DoubleColUnaryMinus(col 5) -> 46:double, FuncSignDoubleToDouble(col 5) -> 47:double, FuncSignLongToDouble(col 3) -> 48:double, FuncCosDoubleToDouble(col 50)(children: DoubleColAddDoubleScalar(col 49, val 3.14159)(children: DoubleColUnaryMinus(col 50)(children: FuncSinDoubleToDouble(col 49)(children: FuncLnDoubleToDouble(col 5) -> 49:double) -> 50:double) -> 49:double) -> 50:double) -> 49:double - Statistics: Num rows: 2048 Data size: 62872 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2048 Data size: 440327 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 2048 Data size: 62872 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2048 Data size: 440327 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/spark/vectorized_nested_mapjoin.q.out b/ql/src/test/results/clientpositive/spark/vectorized_nested_mapjoin.q.out index 996021f..6760e51 100644 --- a/ql/src/test/results/clientpositive/spark/vectorized_nested_mapjoin.q.out +++ b/ql/src/test/results/clientpositive/spark/vectorized_nested_mapjoin.q.out @@ -20,14 +20,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: v1 - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (ctinyint is not null and csmallint is not null) (type: boolean) - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ctinyint (type: tinyint), csmallint (type: smallint), cdouble (type: double) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: 0 _col0 (type: tinyint) @@ -47,14 +47,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: v3 - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: csmallint is not null (type: boolean) - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: csmallint (type: smallint) outputColumnNames: _col0 - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: 0 _col0 (type: smallint) @@ -81,14 +81,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: v2 - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: ctinyint is not null (type: boolean) - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ctinyint (type: tinyint) outputColumnNames: _col0 - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 @@ -98,11 +98,11 @@ STAGE PLANS: outputColumnNames: _col2, _col3 input vertices: 1 Map 3 - Statistics: Num rows: 13516 Data size: 414960 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13516 Data size: 2906160 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col2 (type: smallint), _col3 (type: double) outputColumnNames: _col0, _col1 - Statistics: Num rows: 13516 Data size: 414960 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13516 Data size: 2906160 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 @@ -112,7 +112,7 @@ STAGE PLANS: outputColumnNames: _col1 input vertices: 1 Map 4 - Statistics: Num rows: 14867 Data size: 456456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 14867 Data size: 3196776 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(_col1) mode: hash diff --git a/ql/src/test/results/clientpositive/spark/vectorized_shufflejoin.q.out b/ql/src/test/results/clientpositive/spark/vectorized_shufflejoin.q.out index 3b8b636..9140373 100644 --- a/ql/src/test/results/clientpositive/spark/vectorized_shufflejoin.q.out +++ b/ql/src/test/results/clientpositive/spark/vectorized_shufflejoin.q.out @@ -27,7 +27,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: t1 - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] @@ -37,7 +37,7 @@ STAGE PLANS: native: true predicateExpression: SelectColumnIsNotNull(col 2) -> boolean predicate: cint is not null (type: boolean) - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cint (type: int) outputColumnNames: _col0 @@ -45,7 +45,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [2] - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + @@ -55,7 +55,7 @@ STAGE PLANS: native: false nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true nativeConditionsNotMet: Uniform Hash IS false - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Map Vectorization: enabled: true @@ -69,7 +69,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: t2 - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] @@ -79,7 +79,7 @@ STAGE PLANS: native: true predicateExpression: SelectColumnIsNotNull(col 2) -> boolean predicate: cint is not null (type: boolean) - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cint (type: int) outputColumnNames: _col0 @@ -87,7 +87,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [2] - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + @@ -97,7 +97,7 @@ STAGE PLANS: native: false nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true nativeConditionsNotMet: Uniform Hash IS false - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Map Vectorization: enabled: true @@ -121,11 +121,11 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 13516 Data size: 414960 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13516 Data size: 2906160 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: int), (_col0 + _col1) (type: int) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 13516 Data size: 414960 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13516 Data size: 2906160 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(_col0), max(_col1), min(_col0), avg(_col2) mode: hash diff --git a/ql/src/test/results/clientpositive/spark/vectorized_string_funcs.q.out b/ql/src/test/results/clientpositive/spark/vectorized_string_funcs.q.out index 26aab1c..b88eda5 100644 --- a/ql/src/test/results/clientpositive/spark/vectorized_string_funcs.q.out +++ b/ql/src/test/results/clientpositive/spark/vectorized_string_funcs.q.out @@ -59,17 +59,17 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesorc - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (((cbigint % 237) = 0) and (length(substr(cstring1, 1, 2)) <= 2) and (cstring1 like '%')) (type: boolean) - Statistics: Num rows: 1024 Data size: 31436 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1024 Data size: 220163 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: substr(cstring1, 1, 2) (type: string), substr(cstring1, 2) (type: string), lower(cstring1) (type: string), upper(cstring1) (type: string), upper(cstring1) (type: string), length(cstring1) (type: int), trim(cstring1) (type: string), ltrim(cstring1) (type: string), rtrim(cstring1) (type: string), concat(cstring1, cstring2) (type: string), concat('>', cstring1) (type: string), concat(cstring1, '<') (type: string), concat(substr(cstring1, 1, 2), substr(cstring2, 1, 2)) (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 - Statistics: Num rows: 1024 Data size: 31436 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1024 Data size: 220163 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1024 Data size: 31436 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1024 Data size: 220163 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/special_character_in_tabnames_2.q.out b/ql/src/test/results/clientpositive/special_character_in_tabnames_2.q.out index 8cb04c7..cb53f86 100644 --- a/ql/src/test/results/clientpositive/special_character_in_tabnames_2.q.out +++ b/ql/src/test/results/clientpositive/special_character_in_tabnames_2.q.out @@ -29,10 +29,12 @@ POSTHOOK: Output: default@s/c PREHOOK: query: ANALYZE TABLE `s/c` COMPUTE STATISTICS FOR COLUMNS key,value PREHOOK: type: QUERY PREHOOK: Input: default@s/c +PREHOOK: Output: default@s/c #### A masked pattern was here #### POSTHOOK: query: ANALYZE TABLE `s/c` COMPUTE STATISTICS FOR COLUMNS key,value POSTHOOK: type: QUERY POSTHOOK: Input: default@s/c +POSTHOOK: Output: default@s/c #### A masked pattern was here #### PREHOOK: query: SELECT key, value FROM `s/c` WHERE key > 80 AND key < 100 PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/stats_invalidation.q.out b/ql/src/test/results/clientpositive/stats_invalidation.q.out index a0e7663..a98d98e 100644 --- a/ql/src/test/results/clientpositive/stats_invalidation.q.out +++ b/ql/src/test/results/clientpositive/stats_invalidation.q.out @@ -21,10 +21,12 @@ POSTHOOK: Lineage: stats_invalid.value SIMPLE [(src)src.FieldSchema(name:value, PREHOOK: query: analyze table stats_invalid compute statistics for columns key,value PREHOOK: type: QUERY PREHOOK: Input: default@stats_invalid +PREHOOK: Output: default@stats_invalid #### A masked pattern was here #### POSTHOOK: query: analyze table stats_invalid compute statistics for columns key,value POSTHOOK: type: QUERY POSTHOOK: Input: default@stats_invalid +POSTHOOK: Output: default@stats_invalid #### A masked pattern was here #### PREHOOK: query: desc formatted stats_invalid PREHOOK: type: DESCTABLE diff --git a/ql/src/test/results/clientpositive/stats_missing_warning.q.out b/ql/src/test/results/clientpositive/stats_missing_warning.q.out index 0ed70a0..b905785 100644 --- a/ql/src/test/results/clientpositive/stats_missing_warning.q.out +++ b/ql/src/test/results/clientpositive/stats_missing_warning.q.out @@ -117,26 +117,32 @@ POSTHOOK: Input: default@missing_stats_t3 PREHOOK: query: ANALYZE TABLE missing_stats_t1 COMPUTE STATISTICS FOR COLUMNS PREHOOK: type: QUERY PREHOOK: Input: default@missing_stats_t1 +PREHOOK: Output: default@missing_stats_t1 #### A masked pattern was here #### POSTHOOK: query: ANALYZE TABLE missing_stats_t1 COMPUTE STATISTICS FOR COLUMNS POSTHOOK: type: QUERY POSTHOOK: Input: default@missing_stats_t1 +POSTHOOK: Output: default@missing_stats_t1 #### A masked pattern was here #### PREHOOK: query: ANALYZE TABLE missing_stats_t2 COMPUTE STATISTICS FOR COLUMNS PREHOOK: type: QUERY PREHOOK: Input: default@missing_stats_t2 +PREHOOK: Output: default@missing_stats_t2 #### A masked pattern was here #### POSTHOOK: query: ANALYZE TABLE missing_stats_t2 COMPUTE STATISTICS FOR COLUMNS POSTHOOK: type: QUERY POSTHOOK: Input: default@missing_stats_t2 +POSTHOOK: Output: default@missing_stats_t2 #### A masked pattern was here #### PREHOOK: query: ANALYZE TABLE missing_stats_t3 COMPUTE STATISTICS FOR COLUMNS PREHOOK: type: QUERY PREHOOK: Input: default@missing_stats_t3 +PREHOOK: Output: default@missing_stats_t3 #### A masked pattern was here #### POSTHOOK: query: ANALYZE TABLE missing_stats_t3 COMPUTE STATISTICS FOR COLUMNS POSTHOOK: type: QUERY POSTHOOK: Input: default@missing_stats_t3 +POSTHOOK: Output: default@missing_stats_t3 #### A masked pattern was here #### PREHOOK: query: SELECT COUNT(*) FROM missing_stats_t1 t1 diff --git a/ql/src/test/results/clientpositive/stats_only_null.q.out b/ql/src/test/results/clientpositive/stats_only_null.q.out index 88c2114..97947ba 100644 --- a/ql/src/test/results/clientpositive/stats_only_null.q.out +++ b/ql/src/test/results/clientpositive/stats_only_null.q.out @@ -169,10 +169,12 @@ STAGE PLANS: PREHOOK: query: analyze table stats_null compute statistics for columns a,b,c,d PREHOOK: type: QUERY PREHOOK: Input: default@stats_null +PREHOOK: Output: default@stats_null #### A masked pattern was here #### POSTHOOK: query: analyze table stats_null compute statistics for columns a,b,c,d POSTHOOK: type: QUERY POSTHOOK: Input: default@stats_null +POSTHOOK: Output: default@stats_null #### A masked pattern was here #### PREHOOK: query: analyze table stats_null_part partition(dt='2010') compute statistics for columns a,b,c,d PREHOOK: type: QUERY @@ -364,12 +366,18 @@ PREHOOK: type: QUERY PREHOOK: Input: default@stats_null_part PREHOOK: Input: default@stats_null_part@dt=1 PREHOOK: Input: default@stats_null_part@dt=__HIVE_DEFAULT_PARTITION__ +PREHOOK: Output: default@stats_null_part +PREHOOK: Output: default@stats_null_part@dt=1 +PREHOOK: Output: default@stats_null_part@dt=__HIVE_DEFAULT_PARTITION__ #### A masked pattern was here #### POSTHOOK: query: analyze table stats_null_part compute statistics for columns POSTHOOK: type: QUERY POSTHOOK: Input: default@stats_null_part POSTHOOK: Input: default@stats_null_part@dt=1 POSTHOOK: Input: default@stats_null_part@dt=__HIVE_DEFAULT_PARTITION__ +POSTHOOK: Output: default@stats_null_part +POSTHOOK: Output: default@stats_null_part@dt=1 +POSTHOOK: Output: default@stats_null_part@dt=__HIVE_DEFAULT_PARTITION__ #### A masked pattern was here #### PREHOOK: query: describe formatted stats_null_part partition(dt = 1) a PREHOOK: type: DESCTABLE diff --git a/ql/src/test/results/clientpositive/stats_partial_size.q.out b/ql/src/test/results/clientpositive/stats_partial_size.q.out index c779741..f6d5181 100644 --- a/ql/src/test/results/clientpositive/stats_partial_size.q.out +++ b/ql/src/test/results/clientpositive/stats_partial_size.q.out @@ -28,10 +28,12 @@ POSTHOOK: Output: default@sample PREHOOK: query: analyze table sample compute statistics for columns PREHOOK: type: QUERY PREHOOK: Input: default@sample +PREHOOK: Output: default@sample #### A masked pattern was here #### POSTHOOK: query: analyze table sample compute statistics for columns POSTHOOK: type: QUERY POSTHOOK: Input: default@sample +POSTHOOK: Output: default@sample #### A masked pattern was here #### PREHOOK: query: explain select sample_partitioned.x from sample_partitioned, sample where sample.y = sample_partitioned.y PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/stats_ppr_all.q.out b/ql/src/test/results/clientpositive/stats_ppr_all.q.out index d4060c6..c52f582 100644 --- a/ql/src/test/results/clientpositive/stats_ppr_all.q.out +++ b/ql/src/test/results/clientpositive/stats_ppr_all.q.out @@ -46,6 +46,10 @@ PREHOOK: Input: default@ss PREHOOK: Input: default@ss@country=US/year=2015/month=1/day=1 PREHOOK: Input: default@ss@country=US/year=2015/month=1/day=2 PREHOOK: Input: default@ss@country=US/year=2015/month=2/day=1 +PREHOOK: Output: default@ss +PREHOOK: Output: default@ss@country=US/year=2015/month=1/day=1 +PREHOOK: Output: default@ss@country=US/year=2015/month=1/day=2 +PREHOOK: Output: default@ss@country=US/year=2015/month=2/day=1 #### A masked pattern was here #### POSTHOOK: query: ANALYZE TABLE ss PARTITION(country,year,month,day) compute statistics for columns POSTHOOK: type: QUERY @@ -53,6 +57,10 @@ POSTHOOK: Input: default@ss POSTHOOK: Input: default@ss@country=US/year=2015/month=1/day=1 POSTHOOK: Input: default@ss@country=US/year=2015/month=1/day=2 POSTHOOK: Input: default@ss@country=US/year=2015/month=2/day=1 +POSTHOOK: Output: default@ss +POSTHOOK: Output: default@ss@country=US/year=2015/month=1/day=1 +POSTHOOK: Output: default@ss@country=US/year=2015/month=1/day=2 +POSTHOOK: Output: default@ss@country=US/year=2015/month=2/day=1 #### A masked pattern was here #### PREHOOK: query: explain select sum(order_amount) from ss where (country="US" and year=2015 and month=2 and day=1) PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/temp_table_display_colstats_tbllvl.q.out b/ql/src/test/results/clientpositive/temp_table_display_colstats_tbllvl.q.out index c7ee93d..02d49b7 100644 --- a/ql/src/test/results/clientpositive/temp_table_display_colstats_tbllvl.q.out +++ b/ql/src/test/results/clientpositive/temp_table_display_colstats_tbllvl.q.out @@ -66,7 +66,8 @@ analyze table UserVisits_web_text_none compute statistics for columns sourceIP, POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-0 is a root stage - Stage-1 depends on stages: Stage-0 + Stage-1 depends on stages: Stage-0, Stage-2 + Stage-2 depends on stages: Stage-0 STAGE PLANS: Stage: Stage-0 @@ -109,6 +110,9 @@ STAGE PLANS: Column Types: string, int, float Table: default.uservisits_web_text_none + Stage: Stage-2 + Stats-Aggr Operator + PREHOOK: query: explain extended analyze table UserVisits_web_text_none compute statistics for columns sourceIP, avgTimeOnSite, adRevenue PREHOOK: type: QUERY @@ -117,7 +121,8 @@ analyze table UserVisits_web_text_none compute statistics for columns sourceIP, POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-0 is a root stage - Stage-1 depends on stages: Stage-0 + Stage-1 depends on stages: Stage-0, Stage-2 + Stage-2 depends on stages: Stage-0 STAGE PLANS: Stage: Stage-0 @@ -126,7 +131,8 @@ STAGE PLANS: TableScan alias: uservisits_web_text_none Statistics: Num rows: 65 Data size: 7060 Basic stats: COMPLETE Column stats: NONE - GatherStats: false + Statistics Aggregation Key Prefix: default.uservisits_web_text_none/ + GatherStats: true Select Operator expressions: sourceip (type: string), avgtimeonsite (type: int), adrevenue (type: float) outputColumnNames: sourceip, avgtimeonsite, adrevenue @@ -228,13 +234,19 @@ STAGE PLANS: Table: default.uservisits_web_text_none Is Table Level Stats: true + Stage: Stage-2 + Stats-Aggr Operator + Stats Aggregation Key Prefix: default.uservisits_web_text_none/ + PREHOOK: query: analyze table UserVisits_web_text_none compute statistics for columns sourceIP, avgTimeOnSite, adRevenue PREHOOK: type: QUERY PREHOOK: Input: default@uservisits_web_text_none +PREHOOK: Output: default@uservisits_web_text_none #### A masked pattern was here #### POSTHOOK: query: analyze table UserVisits_web_text_none compute statistics for columns sourceIP, avgTimeOnSite, adRevenue POSTHOOK: type: QUERY POSTHOOK: Input: default@uservisits_web_text_none +POSTHOOK: Output: default@uservisits_web_text_none #### A masked pattern was here #### PREHOOK: query: desc formatted UserVisits_web_text_none sourceIP PREHOOK: type: DESCTABLE @@ -300,7 +312,8 @@ analyze table empty_tab compute statistics for columns a,b,c,d,e POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-0 is a root stage - Stage-1 depends on stages: Stage-0 + Stage-1 depends on stages: Stage-0, Stage-2 + Stage-2 depends on stages: Stage-0 STAGE PLANS: Stage: Stage-0 @@ -343,13 +356,18 @@ STAGE PLANS: Column Types: int, double, string, boolean, binary Table: default.empty_tab + Stage: Stage-2 + Stats-Aggr Operator + PREHOOK: query: analyze table empty_tab compute statistics for columns a,b,c,d,e PREHOOK: type: QUERY PREHOOK: Input: default@empty_tab +PREHOOK: Output: default@empty_tab #### A masked pattern was here #### POSTHOOK: query: analyze table empty_tab compute statistics for columns a,b,c,d,e POSTHOOK: type: QUERY POSTHOOK: Input: default@empty_tab +POSTHOOK: Output: default@empty_tab #### A masked pattern was here #### PREHOOK: query: desc formatted empty_tab a PREHOOK: type: DESCTABLE @@ -469,10 +487,12 @@ PREHOOK: query: analyze table UserVisits_web_text_none compute statistics for co PREHOOK: type: QUERY PREHOOK: Input: test@uservisits_web_text_none #### A masked pattern was here #### +PREHOOK: Output: test@uservisits_web_text_none POSTHOOK: query: analyze table UserVisits_web_text_none compute statistics for columns sKeyword POSTHOOK: type: QUERY POSTHOOK: Input: test@uservisits_web_text_none #### A masked pattern was here #### +POSTHOOK: Output: test@uservisits_web_text_none PREHOOK: query: desc extended UserVisits_web_text_none sKeyword PREHOOK: type: DESCTABLE PREHOOK: Input: test@uservisits_web_text_none diff --git a/ql/src/test/results/clientpositive/tez/explainanalyze_3.q.out b/ql/src/test/results/clientpositive/tez/explainanalyze_3.q.out index 20c330a..19a9895 100644 --- a/ql/src/test/results/clientpositive/tez/explainanalyze_3.q.out +++ b/ql/src/test/results/clientpositive/tez/explainanalyze_3.q.out @@ -234,10 +234,12 @@ Stage-2 PREHOOK: query: analyze table src_stats compute statistics for columns PREHOOK: type: QUERY PREHOOK: Input: default@src_stats +PREHOOK: Output: default@src_stats #### A masked pattern was here #### POSTHOOK: query: analyze table src_stats compute statistics for columns POSTHOOK: type: QUERY POSTHOOK: Input: default@src_stats +POSTHOOK: Output: default@src_stats #### A masked pattern was here #### PREHOOK: query: explain analyze analyze table src_stats compute statistics for columns PREHOOK: type: QUERY @@ -259,6 +261,9 @@ Stage-2 Output:["key","value"] TableScan [TS_0] (rows=500/500 width=10) default@src_stats,src_stats,Tbl:COMPLETE,Col:NONE,Output:["key","value"] + Stage-3 + Stats-Aggr Operator + Please refer to the previous Stage-0 PREHOOK: query: CREATE TEMPORARY MACRO SIGMOID (x DOUBLE) 1.0 / (1.0 + EXP(-x)) PREHOOK: type: CREATEMACRO diff --git a/ql/src/test/results/clientpositive/tez/explainanalyze_5.q.out b/ql/src/test/results/clientpositive/tez/explainanalyze_5.q.out index ee9affb..d5fa74b 100644 --- a/ql/src/test/results/clientpositive/tez/explainanalyze_5.q.out +++ b/ql/src/test/results/clientpositive/tez/explainanalyze_5.q.out @@ -36,10 +36,12 @@ Stage-2 PREHOOK: query: analyze table src_stats compute statistics for columns PREHOOK: type: QUERY PREHOOK: Input: default@src_stats +PREHOOK: Output: default@src_stats #### A masked pattern was here #### POSTHOOK: query: analyze table src_stats compute statistics for columns POSTHOOK: type: QUERY POSTHOOK: Input: default@src_stats +POSTHOOK: Output: default@src_stats #### A masked pattern was here #### PREHOOK: query: explain analyze analyze table src_stats compute statistics for columns PREHOOK: type: QUERY @@ -61,6 +63,9 @@ Stage-2 Output:["key","value"] TableScan [TS_0] (rows=500/500 width=10) default@src_stats,src_stats,Tbl:COMPLETE,Col:NONE,Output:["key","value"] + Stage-3 + Stats-Aggr Operator + Please refer to the previous Stage-0 PREHOOK: query: drop table src_multi2 PREHOOK: type: DROPTABLE diff --git a/ql/src/test/results/clientpositive/tez/explainuser_3.q.out b/ql/src/test/results/clientpositive/tez/explainuser_3.q.out index 74e4693..ddda3ee 100644 --- a/ql/src/test/results/clientpositive/tez/explainuser_3.q.out +++ b/ql/src/test/results/clientpositive/tez/explainuser_3.q.out @@ -19,10 +19,12 @@ POSTHOOK: Lineage: acid_vectorized.b SIMPLE [(alltypesorc)alltypesorc.FieldSchem PREHOOK: query: analyze table acid_vectorized compute statistics for columns PREHOOK: type: QUERY PREHOOK: Input: default@acid_vectorized +PREHOOK: Output: default@acid_vectorized #### A masked pattern was here #### POSTHOOK: query: analyze table acid_vectorized compute statistics for columns POSTHOOK: type: QUERY POSTHOOK: Input: default@acid_vectorized +POSTHOOK: Output: default@acid_vectorized #### A masked pattern was here #### PREHOOK: query: explain select a, b from acid_vectorized order by a, b PREHOOK: type: QUERY @@ -39,13 +41,13 @@ Stage-0 Stage-1 Reducer 2 vectorized File Output Operator [FS_8] - Select Operator [SEL_7] (rows=16 width=101) + Select Operator [SEL_7] (rows=10 width=101) Output:["_col0","_col1"] <-Map 1 [SIMPLE_EDGE] vectorized SHUFFLE [RS_6] - Select Operator [SEL_5] (rows=16 width=101) + Select Operator [SEL_5] (rows=10 width=101) Output:["_col0","_col1"] - TableScan [TS_0] (rows=16 width=101) + TableScan [TS_0] (rows=10 width=101) default@acid_vectorized,acid_vectorized, ACID table,Tbl:COMPLETE,Col:COMPLETE,Output:["a","b"] PREHOOK: query: explain select key, value @@ -215,6 +217,9 @@ Stage-2 Output:["key","value"] TableScan [TS_0] (rows=500 width=178) default@src,src,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] + Stage-3 + Stats-Aggr Operator + Please refer to the previous Stage-0 PREHOOK: query: explain CREATE TEMPORARY MACRO SIGMOID (x DOUBLE) 1.0 / (1.0 + EXP(-x)) diff --git a/ql/src/test/results/clientpositive/vector_outer_join1.q.out b/ql/src/test/results/clientpositive/vector_outer_join1.q.out index 565ddac..c4e89b4 100644 --- a/ql/src/test/results/clientpositive/vector_outer_join1.q.out +++ b/ql/src/test/results/clientpositive/vector_outer_join1.q.out @@ -186,10 +186,12 @@ POSTHOOK: Output: default@small_alltypesorc_a PREHOOK: query: ANALYZE TABLE small_alltypesorc_a COMPUTE STATISTICS FOR COLUMNS PREHOOK: type: QUERY PREHOOK: Input: default@small_alltypesorc_a +PREHOOK: Output: default@small_alltypesorc_a #### A masked pattern was here #### POSTHOOK: query: ANALYZE TABLE small_alltypesorc_a COMPUTE STATISTICS FOR COLUMNS POSTHOOK: type: QUERY POSTHOOK: Input: default@small_alltypesorc_a +POSTHOOK: Output: default@small_alltypesorc_a #### A masked pattern was here #### PREHOOK: query: select * from small_alltypesorc_a PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/vector_outer_join2.q.out b/ql/src/test/results/clientpositive/vector_outer_join2.q.out index 4f63113..88183b9 100644 --- a/ql/src/test/results/clientpositive/vector_outer_join2.q.out +++ b/ql/src/test/results/clientpositive/vector_outer_join2.q.out @@ -191,10 +191,12 @@ POSTHOOK: Output: default@small_alltypesorc_a PREHOOK: query: ANALYZE TABLE small_alltypesorc_a COMPUTE STATISTICS FOR COLUMNS PREHOOK: type: QUERY PREHOOK: Input: default@small_alltypesorc_a +PREHOOK: Output: default@small_alltypesorc_a #### A masked pattern was here #### POSTHOOK: query: ANALYZE TABLE small_alltypesorc_a COMPUTE STATISTICS FOR COLUMNS POSTHOOK: type: QUERY POSTHOOK: Input: default@small_alltypesorc_a +POSTHOOK: Output: default@small_alltypesorc_a #### A masked pattern was here #### PREHOOK: query: select * from small_alltypesorc_a PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/vector_outer_join3.q.out b/ql/src/test/results/clientpositive/vector_outer_join3.q.out index 53377db..77a571e 100644 --- a/ql/src/test/results/clientpositive/vector_outer_join3.q.out +++ b/ql/src/test/results/clientpositive/vector_outer_join3.q.out @@ -191,10 +191,12 @@ POSTHOOK: Output: default@small_alltypesorc_a PREHOOK: query: ANALYZE TABLE small_alltypesorc_a COMPUTE STATISTICS FOR COLUMNS PREHOOK: type: QUERY PREHOOK: Input: default@small_alltypesorc_a +PREHOOK: Output: default@small_alltypesorc_a #### A masked pattern was here #### POSTHOOK: query: ANALYZE TABLE small_alltypesorc_a COMPUTE STATISTICS FOR COLUMNS POSTHOOK: type: QUERY POSTHOOK: Input: default@small_alltypesorc_a +POSTHOOK: Output: default@small_alltypesorc_a #### A masked pattern was here #### PREHOOK: query: select * from small_alltypesorc_a PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/vector_outer_join4.q.out b/ql/src/test/results/clientpositive/vector_outer_join4.q.out index 324b67e..194a534 100644 --- a/ql/src/test/results/clientpositive/vector_outer_join4.q.out +++ b/ql/src/test/results/clientpositive/vector_outer_join4.q.out @@ -201,10 +201,12 @@ POSTHOOK: Output: default@small_alltypesorc_b PREHOOK: query: ANALYZE TABLE small_alltypesorc_b COMPUTE STATISTICS FOR COLUMNS PREHOOK: type: QUERY PREHOOK: Input: default@small_alltypesorc_b +PREHOOK: Output: default@small_alltypesorc_b #### A masked pattern was here #### POSTHOOK: query: ANALYZE TABLE small_alltypesorc_b COMPUTE STATISTICS FOR COLUMNS POSTHOOK: type: QUERY POSTHOOK: Input: default@small_alltypesorc_b +POSTHOOK: Output: default@small_alltypesorc_b #### A masked pattern was here #### PREHOOK: query: select * from small_alltypesorc_b PREHOOK: type: QUERY