diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java index 3e749eb..10b5522 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java @@ -42,6 +42,7 @@ import org.apache.hadoop.hive.common.FileUtils; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.metastore.HiveMetaStore; +import org.apache.hadoop.hive.metastore.MetaStoreUtils; import org.apache.hadoop.hive.metastore.api.Database; import org.apache.hadoop.hive.metastore.api.FieldSchema; import org.apache.hadoop.hive.metastore.api.Order; @@ -1007,6 +1008,23 @@ public TableSpec(Hive db, HiveConf conf, String tableName, Map p } } + public TableSpec(Hive db, HiveConf conf, Table tab) throws HiveException { + this.tableName = tab.getTableName(); + this.tableHandle = tab; + if (this.tableHandle.isPartitioned()) { + this.specType = SpecType.STATIC_PARTITION; + this.partitions = new ArrayList<>(); + this.partitions.addAll(db.getAllPartitionsOf(this.tableHandle)); + List partCols = this.tableHandle.getPartCols(); + this.partSpec = new LinkedHashMap<>(); + for (FieldSchema partCol : partCols) { + partSpec.put(partCol.getName(), null); + } + } else { + this.specType = SpecType.TABLE_ONLY; + } + } + public TableSpec(Hive db, HiveConf conf, ASTNode ast, boolean allowDynamicPartitionsSpec, boolean allowPartialPartitionsSpec) throws SemanticException { assert (ast.getToken().getType() == HiveParser.TOK_TAB @@ -1155,6 +1173,7 @@ public String toString() { private List colName; private List colType; private boolean tblLvl; + private boolean isCollectTableStats; public String getTableName() { @@ -1188,6 +1207,15 @@ public void setTblLvl(boolean isTblLvl) { public void setColType(List colType) { this.colType = colType; } + + public boolean isCollectTableStats() { + return isCollectTableStats; + } + + public void setCollectTableStats(boolean isCollectTableStats) { + this.isCollectTableStats = isCollectTableStats; + } + } /** diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java index 93b8183..f45eca9 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java @@ -68,6 +68,8 @@ private List colNames; private List colType; private Table tbl; + // We can collect table stats only in a whole table scan when table is a partitioned table + private boolean isCollectTableStats = true; public ColumnStatsSemanticAnalyzer(QueryState queryState) throws SemanticException { super(queryState); @@ -157,6 +159,7 @@ private StringBuilder genPartitionClause(Map partSpec) throws Sem for (String partKey : partSpec.keySet()) { String value; if ((value = partSpec.get(partKey)) != null) { + isCollectTableStats = false; if (!predPresent) { predPresent = true; } else { @@ -403,6 +406,7 @@ public void analyze(ASTNode ast, Context origCtx) throws SemanticException { analyzeRewrite.setTblLvl(isTableLevel); analyzeRewrite.setColName(colNames); analyzeRewrite.setColType(colType); + analyzeRewrite.setCollectTableStats(isCollectTableStats); qbp.setAnalyzeRewrite(analyzeRewrite); initCtx(ctx); ctx.setExplainConfig(origCtx.getExplainConfig()); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java index 2430811..db8a425 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java @@ -10246,9 +10246,28 @@ private void setupStats(TableScanDesc tsDesc, QBParseInfo qbp, Table tab, String RowResolver rwsch) throws SemanticException { - if (!qbp.isAnalyzeCommand()) { + // if it is collecting column stats, we still gather stats for table scan. + if (!qbp.isAnalyzeCommand() && qbp.getAnalyzeRewrite() == null) { tsDesc.setGatherStats(false); } else { + String tblName = tab.getTableName(); + TableSpec tblSpec = qbp.getTableSpec(alias); + if (qbp.getAnalyzeRewrite() != null) { + // this is computing column stats + if (tblSpec == null && qbp.getAnalyzeRewrite().isCollectTableStats()) { + try { + tblSpec = new TableSpec(db, conf, tab); + tab.setTableSpec(tblSpec); + } catch (HiveException e) { + e.printStackTrace(); + throw new SemanticException(e); + } + } else { + LOG.debug("Tried to collect table stats but failed because it has partition specification which may not include all the partitions."); + tsDesc.setGatherStats(false); + return; + } + } if (HiveConf.getVar(conf, HIVESTATSDBCLASS).equalsIgnoreCase(StatDB.fs.name())) { String statsTmpLoc = ctx.getTempDirForPath(tab.getPath()).toString(); LOG.debug("Set stats collection dir : " + statsTmpLoc); @@ -10262,14 +10281,12 @@ private void setupStats(TableScanDesc tsDesc, QBParseInfo qbp, Table tab, String List vcList = new ArrayList(); while (vcs.hasNext()) { VirtualColumn vc = vcs.next(); - rwsch.put(alias, vc.getName(), new ColumnInfo(vc.getName(), - vc.getTypeInfo(), alias, true, vc.getIsHidden())); + rwsch.put(alias, vc.getName(), new ColumnInfo(vc.getName(), vc.getTypeInfo(), alias, true, + vc.getIsHidden())); vcList.add(vc); } tsDesc.addVirtualCols(vcList); - String tblName = tab.getTableName(); - TableSpec tblSpec = qbp.getTableSpec(alias); Map partSpec = tblSpec.getPartSpec(); if (partSpec != null) { @@ -10279,13 +10296,14 @@ private void setupStats(TableScanDesc tsDesc, QBParseInfo qbp, Table tab, String } // Theoretically the key prefix could be any unique string shared - // between TableScanOperator (when publishing) and StatsTask (when aggregating). + // between TableScanOperator (when publishing) and StatsTask (when + // aggregating). // Here we use // db_name.table_name + partitionSec // as the prefix for easy of read during explain and debugging. // Currently, partition spec can only be static partition. String k = MetaStoreUtils.encodeTableName(tblName) + Path.SEPARATOR; - tsDesc.setStatsAggPrefix(tab.getDbName()+"."+k); + tsDesc.setStatsAggPrefix(tab.getDbName() + "." + k); // set up WriteEntity for replication outputs.add(new WriteEntity(tab, WriteEntity.WriteType.DDL_SHARED)); @@ -10295,11 +10313,11 @@ private void setupStats(TableScanDesc tsDesc, QBParseInfo qbp, Table tab, String if (partSpec == null) { throw new SemanticException(ErrorMsg.NEED_PARTITION_SPECIFICATION.getMsg()); } - List partitions = qbp.getTableSpec().partitions; + List partitions = tblSpec.partitions; if (partitions != null) { for (Partition partn : partitions) { // inputs.add(new ReadEntity(partn)); // is this needed at all? - LOG.info("XXX: adding part: "+partn); + LOG.info("XXX: adding part: " + partn); outputs.add(new WriteEntity(partn, WriteEntity.WriteType.DDL_NO_LOCK)); } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/TaskCompiler.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/TaskCompiler.java index 5f9ccc8..b55d8a6 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/TaskCompiler.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/TaskCompiler.java @@ -44,6 +44,7 @@ import org.apache.hadoop.hive.ql.exec.FetchTask; import org.apache.hadoop.hive.ql.exec.Operator; import org.apache.hadoop.hive.ql.exec.StatsTask; +import org.apache.hadoop.hive.ql.exec.TableScanOperator; import org.apache.hadoop.hive.ql.exec.Task; import org.apache.hadoop.hive.ql.exec.TaskFactory; import org.apache.hadoop.hive.ql.exec.Utilities; @@ -51,11 +52,15 @@ import org.apache.hadoop.hive.ql.exec.spark.SparkTask; import org.apache.hadoop.hive.ql.hooks.ReadEntity; import org.apache.hadoop.hive.ql.hooks.WriteEntity; +import org.apache.hadoop.hive.ql.io.orc.OrcInputFormat; import org.apache.hadoop.hive.ql.metadata.Hive; import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.metadata.Partition; +import org.apache.hadoop.hive.ql.metadata.Table; import org.apache.hadoop.hive.ql.optimizer.GenMapRedUtils; import org.apache.hadoop.hive.ql.optimizer.physical.AnnotateRunTimeStatsOptimizer; import org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer.AnalyzeRewriteContext; +import org.apache.hadoop.hive.ql.plan.BaseWork; import org.apache.hadoop.hive.ql.plan.ColumnStatsDesc; import org.apache.hadoop.hive.ql.plan.ColumnStatsWork; import org.apache.hadoop.hive.ql.plan.CreateTableDesc; @@ -66,7 +71,10 @@ import org.apache.hadoop.hive.ql.plan.LoadTableDesc; import org.apache.hadoop.hive.ql.plan.MoveWork; import org.apache.hadoop.hive.ql.plan.PlanUtils; +import org.apache.hadoop.hive.ql.plan.StatsNoJobWork; +import org.apache.hadoop.hive.ql.plan.StatsWork; import org.apache.hadoop.hive.ql.plan.TableDesc; +import org.apache.hadoop.hive.ql.plan.TableScanDesc; import org.apache.hadoop.hive.ql.session.SessionState; import org.apache.hadoop.hive.ql.session.SessionState.LogHelper; import org.apache.hadoop.hive.serde.serdeConstants; @@ -76,6 +84,7 @@ import org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe; import org.apache.hadoop.hive.serde2.thrift.ThriftFormatter; import org.apache.hadoop.hive.serde2.thrift.ThriftJDBCBinarySerDe; +import org.apache.hadoop.mapred.InputFormat; import com.google.common.collect.Interner; import com.google.common.collect.Interners; @@ -300,6 +309,19 @@ public void compile(final ParseContext pCtx, final List source = rootTasks.iterator().next(); + source.getMapWork().iterator().next().setGatheringStats(true); + TableScanOperator tableScanOperator = (TableScanOperator) source.getMapWork().iterator() + .next().getAllRootOperators().iterator().next(); + Task statsTask = genTableStats(tableScanOperator, source, pCtx); + // Make ColumnStatsTask depend on the stats task because stats task + // does not read HMS (it reads table spec instead) when writes stats. + // See more details in StatsTask. + statsTask.addDependentTask(source.getChildTasks().iterator().next()); + } } else { for (ColumnStatsAutoGatherContext columnStatsAutoGatherContext : pCtx .getColumnStatsAutoGatherContexts()) { @@ -364,6 +386,47 @@ public void compile(final ParseContext pCtx, final List genTableStats(TableScanOperator tableScan, + Task sourceTask, ParseContext parseContext) { + // note that, we have to scan table if we want to get column stats. Thus, we + // can ignore noscan, etc. + Task statsTask = null; + Class inputFormat = tableScan.getConf().getTableMetadata() + .getInputFormatClass(); + if (inputFormat.equals(OrcInputFormat.class)) { + // For ORC, all the following statements are the same + // ANALYZE TABLE T [PARTITION (...)] COMPUTE STATISTICS + // ANALYZE TABLE T [PARTITION (...)] COMPUTE STATISTICS partialscan; + // ANALYZE TABLE T [PARTITION (...)] COMPUTE STATISTICS noscan; + + StatsNoJobWork snjWork = new StatsNoJobWork(tableScan.getConf().getTableMetadata() + .getTableSpec()); + tableScan.getConf().setGatherStats(false); + snjWork.setStatsReliable(parseContext.getConf().getBoolVar( + HiveConf.ConfVars.HIVE_STATS_RELIABLE)); + // If partition is specified, get pruned partition list + Set confirmedParts = GenMapRedUtils.getConfirmedPartitionsForScan(tableScan); + if (confirmedParts.size() > 0) { + Table source = tableScan.getConf().getTableMetadata(); + List partCols = GenMapRedUtils.getPartitionColumns(tableScan); + PrunedPartitionList partList = new PrunedPartitionList(source, confirmedParts, partCols, + false); + snjWork.setPrunedPartitionList(partList); + } + statsTask = TaskFactory.get(snjWork, parseContext.getConf()); + } else { + StatsWork statsWork = new StatsWork(tableScan.getConf().getTableMetadata().getTableSpec()); + statsWork.setAggKey(tableScan.getConf().getStatsAggPrefix()); + statsWork.setStatsTmpDir(tableScan.getConf().getTmpStatsDir()); + statsWork.setSourceTask(sourceTask); + statsWork.setStatsReliable(parseContext.getConf().getBoolVar( + HiveConf.ConfVars.HIVE_STATS_RELIABLE)); + statsTask = TaskFactory.get(statsWork, parseContext.getConf()); + } + sourceTask.addDependentTask(statsTask); + return statsTask; + } + private void patchUpAfterCTASorMaterializedView(final List> rootTasks, final HashSet outputs, Task createTask) { diff --git a/ql/src/test/queries/clientpositive/column_table_stats.q b/ql/src/test/queries/clientpositive/column_table_stats.q new file mode 100644 index 0000000..5f3da9d --- /dev/null +++ b/ql/src/test/queries/clientpositive/column_table_stats.q @@ -0,0 +1,40 @@ +set hive.mapred.mode=nonstrict; +-- SORT_QUERY_RESULTS + +DROP TABLE IF EXISTS s; + +CREATE TABLE s (key STRING COMMENT 'default', value STRING COMMENT 'default') STORED AS TEXTFILE; + +LOAD DATA LOCAL INPATH '../../data/files/kv1.txt' INTO TABLE s; + +desc formatted s; + +explain extended analyze table s compute statistics for columns; + +analyze table s compute statistics for columns; + +desc formatted s; + +DROP TABLE IF EXISTS spart; + +CREATE TABLE spart (key STRING COMMENT 'default', value STRING COMMENT 'default') +PARTITIONED BY (ds STRING, hr STRING) +STORED AS TEXTFILE; + +LOAD DATA LOCAL INPATH "../../data/files/kv1.txt" +OVERWRITE INTO TABLE spart PARTITION (ds="2008-04-08", hr="11"); + +LOAD DATA LOCAL INPATH "../../data/files/kv1.txt" +OVERWRITE INTO TABLE spart PARTITION (ds="2008-04-08", hr="12"); + + +desc formatted spart; + +explain extended analyze table spart compute statistics for columns; + +analyze table spart compute statistics for columns; + +desc formatted spart; + +desc formatted spart PARTITION(ds='2008-04-08', hr=11); +desc formatted spart PARTITION(ds='2008-04-08', hr=12); diff --git a/ql/src/test/queries/clientpositive/exec_parallel_column_stats.q b/ql/src/test/queries/clientpositive/exec_parallel_column_stats.q index ceacc24..09dacae 100644 --- a/ql/src/test/queries/clientpositive/exec_parallel_column_stats.q +++ b/ql/src/test/queries/clientpositive/exec_parallel_column_stats.q @@ -1,5 +1,7 @@ set hive.exec.parallel=true; -explain analyze table src compute statistics for columns; +create table s like src; -analyze table src compute statistics for columns; \ No newline at end of file +explain analyze table s compute statistics for columns; + +analyze table s compute statistics for columns; diff --git a/ql/src/test/results/clientpositive/alter_table_update_status.q.out b/ql/src/test/results/clientpositive/alter_table_update_status.q.out index a3c4f1a..c9098ce 100644 --- a/ql/src/test/results/clientpositive/alter_table_update_status.q.out +++ b/ql/src/test/results/clientpositive/alter_table_update_status.q.out @@ -35,10 +35,12 @@ POSTHOOK: Output: default@src_stat_int PREHOOK: query: ANALYZE TABLE src_stat COMPUTE STATISTICS for columns key PREHOOK: type: QUERY PREHOOK: Input: default@src_stat +PREHOOK: Output: default@src_stat #### A masked pattern was here #### POSTHOOK: query: ANALYZE TABLE src_stat COMPUTE STATISTICS for columns key POSTHOOK: type: QUERY POSTHOOK: Input: default@src_stat +POSTHOOK: Output: default@src_stat #### A masked pattern was here #### PREHOOK: query: describe formatted src_stat key PREHOOK: type: DESCTABLE @@ -78,10 +80,12 @@ value string PREHOOK: query: ANALYZE TABLE src_stat_int COMPUTE STATISTICS for columns key PREHOOK: type: QUERY PREHOOK: Input: default@src_stat_int +PREHOOK: Output: default@src_stat_int #### A masked pattern was here #### POSTHOOK: query: ANALYZE TABLE src_stat_int COMPUTE STATISTICS for columns key POSTHOOK: type: QUERY POSTHOOK: Input: default@src_stat_int +POSTHOOK: Output: default@src_stat_int #### A masked pattern was here #### PREHOOK: query: describe formatted src_stat_int key PREHOOK: type: DESCTABLE diff --git a/ql/src/test/results/clientpositive/analyze_tbl_date.q.out b/ql/src/test/results/clientpositive/analyze_tbl_date.q.out index a0cdbca..eac13cc 100644 --- a/ql/src/test/results/clientpositive/analyze_tbl_date.q.out +++ b/ql/src/test/results/clientpositive/analyze_tbl_date.q.out @@ -16,10 +16,12 @@ POSTHOOK: Lineage: test_table.d EXPRESSION [(values__tmp__table__1)values__tmp__ PREHOOK: query: analyze table test_table compute statistics for columns PREHOOK: type: QUERY PREHOOK: Input: default@test_table +PREHOOK: Output: default@test_table #### A masked pattern was here #### POSTHOOK: query: analyze table test_table compute statistics for columns POSTHOOK: type: QUERY POSTHOOK: Input: default@test_table +POSTHOOK: Output: default@test_table #### A masked pattern was here #### PREHOOK: query: describe formatted test_table PREHOOK: type: DESCTABLE diff --git a/ql/src/test/results/clientpositive/analyze_tbl_part.q.out b/ql/src/test/results/clientpositive/analyze_tbl_part.q.out index ed90b6f..cae6973 100644 --- a/ql/src/test/results/clientpositive/analyze_tbl_part.q.out +++ b/ql/src/test/results/clientpositive/analyze_tbl_part.q.out @@ -35,12 +35,18 @@ PREHOOK: type: QUERY PREHOOK: Input: default@src_stat_part PREHOOK: Input: default@src_stat_part@partitionid=1 PREHOOK: Input: default@src_stat_part@partitionid=2 +PREHOOK: Output: default@src_stat_part +PREHOOK: Output: default@src_stat_part@partitionid=1 +PREHOOK: Output: default@src_stat_part@partitionid=2 #### A masked pattern was here #### POSTHOOK: query: ANALYZE TABLE src_stat_part partition (partitionId) COMPUTE STATISTICS for columns key POSTHOOK: type: QUERY POSTHOOK: Input: default@src_stat_part POSTHOOK: Input: default@src_stat_part@partitionid=1 POSTHOOK: Input: default@src_stat_part@partitionid=2 +POSTHOOK: Output: default@src_stat_part +POSTHOOK: Output: default@src_stat_part@partitionid=1 +POSTHOOK: Output: default@src_stat_part@partitionid=2 #### A masked pattern was here #### PREHOOK: query: describe formatted src_stat_part PARTITION(partitionId=1) key PREHOOK: type: DESCTABLE @@ -56,12 +62,18 @@ PREHOOK: type: QUERY PREHOOK: Input: default@src_stat_part PREHOOK: Input: default@src_stat_part@partitionid=1 PREHOOK: Input: default@src_stat_part@partitionid=2 +PREHOOK: Output: default@src_stat_part +PREHOOK: Output: default@src_stat_part@partitionid=1 +PREHOOK: Output: default@src_stat_part@partitionid=2 #### A masked pattern was here #### POSTHOOK: query: ANALYZE TABLE src_stat_part partition (partitionId) COMPUTE STATISTICS for columns key, value POSTHOOK: type: QUERY POSTHOOK: Input: default@src_stat_part POSTHOOK: Input: default@src_stat_part@partitionid=1 POSTHOOK: Input: default@src_stat_part@partitionid=2 +POSTHOOK: Output: default@src_stat_part +POSTHOOK: Output: default@src_stat_part@partitionid=1 +POSTHOOK: Output: default@src_stat_part@partitionid=2 #### A masked pattern was here #### PREHOOK: query: describe formatted src_stat_part PARTITION(partitionId=1) key PREHOOK: type: DESCTABLE diff --git a/ql/src/test/results/clientpositive/annotate_stats_deep_filters.q.out b/ql/src/test/results/clientpositive/annotate_stats_deep_filters.q.out index 32644dc..1b9efc2 100644 --- a/ql/src/test/results/clientpositive/annotate_stats_deep_filters.q.out +++ b/ql/src/test/results/clientpositive/annotate_stats_deep_filters.q.out @@ -59,10 +59,12 @@ POSTHOOK: Output: default@over1k PREHOOK: query: analyze table over1k compute statistics for columns PREHOOK: type: QUERY PREHOOK: Input: default@over1k +PREHOOK: Output: default@over1k #### A masked pattern was here #### POSTHOOK: query: analyze table over1k compute statistics for columns POSTHOOK: type: QUERY POSTHOOK: Input: default@over1k +POSTHOOK: Output: default@over1k #### A masked pattern was here #### PREHOOK: query: explain select count(*) from over1k where ( (t=1 and si=2) diff --git a/ql/src/test/results/clientpositive/annotate_stats_filter.q.out b/ql/src/test/results/clientpositive/annotate_stats_filter.q.out index e22c3ef..aeb9719 100644 --- a/ql/src/test/results/clientpositive/annotate_stats_filter.q.out +++ b/ql/src/test/results/clientpositive/annotate_stats_filter.q.out @@ -112,10 +112,12 @@ STAGE PLANS: PREHOOK: query: analyze table loc_orc compute statistics for columns state,locid,zip,year PREHOOK: type: QUERY PREHOOK: Input: default@loc_orc +PREHOOK: Output: default@loc_orc #### A masked pattern was here #### POSTHOOK: query: analyze table loc_orc compute statistics for columns state,locid,zip,year POSTHOOK: type: QUERY POSTHOOK: Input: default@loc_orc +POSTHOOK: Output: default@loc_orc #### A masked pattern was here #### PREHOOK: query: explain select * from loc_orc where state='OH' PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/annotate_stats_groupby.q.out b/ql/src/test/results/clientpositive/annotate_stats_groupby.q.out index a8e4854..95017f8 100644 --- a/ql/src/test/results/clientpositive/annotate_stats_groupby.q.out +++ b/ql/src/test/results/clientpositive/annotate_stats_groupby.q.out @@ -76,10 +76,12 @@ STAGE PLANS: PREHOOK: query: analyze table loc_orc compute statistics for columns state PREHOOK: type: QUERY PREHOOK: Input: default@loc_orc +PREHOOK: Output: default@loc_orc #### A masked pattern was here #### POSTHOOK: query: analyze table loc_orc compute statistics for columns state POSTHOOK: type: QUERY POSTHOOK: Input: default@loc_orc +POSTHOOK: Output: default@loc_orc #### A masked pattern was here #### PREHOOK: query: explain select a, c, min(b) from ( select state as a, locid as b, count(*) as c @@ -177,10 +179,12 @@ STAGE PLANS: PREHOOK: query: analyze table loc_orc compute statistics for columns state,locid,year PREHOOK: type: QUERY PREHOOK: Input: default@loc_orc +PREHOOK: Output: default@loc_orc #### A masked pattern was here #### POSTHOOK: query: analyze table loc_orc compute statistics for columns state,locid,year POSTHOOK: type: QUERY POSTHOOK: Input: default@loc_orc +POSTHOOK: Output: default@loc_orc #### A masked pattern was here #### PREHOOK: query: explain select year from loc_orc group by year PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/annotate_stats_groupby2.q.out b/ql/src/test/results/clientpositive/annotate_stats_groupby2.q.out index 31c4ed1..d95af92 100644 --- a/ql/src/test/results/clientpositive/annotate_stats_groupby2.q.out +++ b/ql/src/test/results/clientpositive/annotate_stats_groupby2.q.out @@ -29,10 +29,12 @@ POSTHOOK: Output: default@location PREHOOK: query: analyze table location compute statistics for columns state, country PREHOOK: type: QUERY PREHOOK: Input: default@location +PREHOOK: Output: default@location #### A masked pattern was here #### POSTHOOK: query: analyze table location compute statistics for columns state, country POSTHOOK: type: QUERY POSTHOOK: Input: default@location +POSTHOOK: Output: default@location #### A masked pattern was here #### PREHOOK: query: explain select state, country from location group by state, country PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/annotate_stats_join.q.out b/ql/src/test/results/clientpositive/annotate_stats_join.q.out index 5d4fe6c..685088d 100644 --- a/ql/src/test/results/clientpositive/annotate_stats_join.q.out +++ b/ql/src/test/results/clientpositive/annotate_stats_join.q.out @@ -97,26 +97,32 @@ POSTHOOK: Output: default@loc PREHOOK: query: analyze table emp compute statistics for columns lastname,deptid,locid PREHOOK: type: QUERY PREHOOK: Input: default@emp +PREHOOK: Output: default@emp #### A masked pattern was here #### POSTHOOK: query: analyze table emp compute statistics for columns lastname,deptid,locid POSTHOOK: type: QUERY POSTHOOK: Input: default@emp +POSTHOOK: Output: default@emp #### A masked pattern was here #### PREHOOK: query: analyze table dept compute statistics for columns deptname,deptid PREHOOK: type: QUERY PREHOOK: Input: default@dept +PREHOOK: Output: default@dept #### A masked pattern was here #### POSTHOOK: query: analyze table dept compute statistics for columns deptname,deptid POSTHOOK: type: QUERY POSTHOOK: Input: default@dept +POSTHOOK: Output: default@dept #### A masked pattern was here #### PREHOOK: query: analyze table loc compute statistics for columns state,locid,zip,year PREHOOK: type: QUERY PREHOOK: Input: default@loc +PREHOOK: Output: default@loc #### A masked pattern was here #### POSTHOOK: query: analyze table loc compute statistics for columns state,locid,zip,year POSTHOOK: type: QUERY POSTHOOK: Input: default@loc +POSTHOOK: Output: default@loc #### A masked pattern was here #### PREHOOK: query: explain select * from emp e join dept d on (e.deptid = d.deptid) PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/annotate_stats_join_pkfk.q.out b/ql/src/test/results/clientpositive/annotate_stats_join_pkfk.q.out index b4d46d2..e74554d 100644 --- a/ql/src/test/results/clientpositive/annotate_stats_join_pkfk.q.out +++ b/ql/src/test/results/clientpositive/annotate_stats_join_pkfk.q.out @@ -297,10 +297,12 @@ POSTHOOK: Output: default@store PREHOOK: query: analyze table store compute statistics for columns s_store_sk, s_floor_space PREHOOK: type: QUERY PREHOOK: Input: default@store +PREHOOK: Output: default@store #### A masked pattern was here #### POSTHOOK: query: analyze table store compute statistics for columns s_store_sk, s_floor_space POSTHOOK: type: QUERY POSTHOOK: Input: default@store +POSTHOOK: Output: default@store #### A masked pattern was here #### PREHOOK: query: analyze table store_bigint compute statistics PREHOOK: type: QUERY @@ -313,10 +315,12 @@ POSTHOOK: Output: default@store_bigint PREHOOK: query: analyze table store_bigint compute statistics for columns s_store_sk, s_floor_space PREHOOK: type: QUERY PREHOOK: Input: default@store_bigint +PREHOOK: Output: default@store_bigint #### A masked pattern was here #### POSTHOOK: query: analyze table store_bigint compute statistics for columns s_store_sk, s_floor_space POSTHOOK: type: QUERY POSTHOOK: Input: default@store_bigint +POSTHOOK: Output: default@store_bigint #### A masked pattern was here #### PREHOOK: query: analyze table store_sales compute statistics PREHOOK: type: QUERY @@ -329,10 +333,12 @@ POSTHOOK: Output: default@store_sales PREHOOK: query: analyze table store_sales compute statistics for columns ss_store_sk, ss_addr_sk, ss_quantity PREHOOK: type: QUERY PREHOOK: Input: default@store_sales +PREHOOK: Output: default@store_sales #### A masked pattern was here #### POSTHOOK: query: analyze table store_sales compute statistics for columns ss_store_sk, ss_addr_sk, ss_quantity POSTHOOK: type: QUERY POSTHOOK: Input: default@store_sales +POSTHOOK: Output: default@store_sales #### A masked pattern was here #### PREHOOK: query: analyze table customer_address compute statistics PREHOOK: type: QUERY @@ -345,10 +351,12 @@ POSTHOOK: Output: default@customer_address PREHOOK: query: analyze table customer_address compute statistics for columns ca_address_sk PREHOOK: type: QUERY PREHOOK: Input: default@customer_address +PREHOOK: Output: default@customer_address #### A masked pattern was here #### POSTHOOK: query: analyze table customer_address compute statistics for columns ca_address_sk POSTHOOK: type: QUERY POSTHOOK: Input: default@customer_address +POSTHOOK: Output: default@customer_address #### A masked pattern was here #### PREHOOK: query: explain select s.s_store_sk from store s join store_sales ss on (s.s_store_sk = ss.ss_store_sk) PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/annotate_stats_limit.q.out b/ql/src/test/results/clientpositive/annotate_stats_limit.q.out index ea181cb..5139db4 100644 --- a/ql/src/test/results/clientpositive/annotate_stats_limit.q.out +++ b/ql/src/test/results/clientpositive/annotate_stats_limit.q.out @@ -55,10 +55,12 @@ POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name PREHOOK: query: analyze table loc_orc compute statistics for columns state, locid, zip, year PREHOOK: type: QUERY PREHOOK: Input: default@loc_orc +PREHOOK: Output: default@loc_orc #### A masked pattern was here #### POSTHOOK: query: analyze table loc_orc compute statistics for columns state, locid, zip, year POSTHOOK: type: QUERY POSTHOOK: Input: default@loc_orc +POSTHOOK: Output: default@loc_orc #### A masked pattern was here #### PREHOOK: query: explain select * from loc_orc PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/annotate_stats_select.q.out b/ql/src/test/results/clientpositive/annotate_stats_select.q.out index 873f1ab..af83385 100644 --- a/ql/src/test/results/clientpositive/annotate_stats_select.q.out +++ b/ql/src/test/results/clientpositive/annotate_stats_select.q.out @@ -113,10 +113,12 @@ STAGE PLANS: PREHOOK: query: analyze table alltypes_orc compute statistics for columns bo1, ti1, si1, i1, bi1, f1, d1, s1, vc1 PREHOOK: type: QUERY PREHOOK: Input: default@alltypes_orc +PREHOOK: Output: default@alltypes_orc #### A masked pattern was here #### POSTHOOK: query: analyze table alltypes_orc compute statistics for columns bo1, ti1, si1, i1, bi1, f1, d1, s1, vc1 POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypes_orc +POSTHOOK: Output: default@alltypes_orc #### A masked pattern was here #### PREHOOK: query: explain select * from alltypes_orc PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/annotate_stats_table.q.out b/ql/src/test/results/clientpositive/annotate_stats_table.q.out index efc3c1f..d8c0dfd 100644 --- a/ql/src/test/results/clientpositive/annotate_stats_table.q.out +++ b/ql/src/test/results/clientpositive/annotate_stats_table.q.out @@ -120,10 +120,12 @@ STAGE PLANS: PREHOOK: query: analyze table emp_orc compute statistics for columns deptid PREHOOK: type: QUERY PREHOOK: Input: default@emp_orc +PREHOOK: Output: default@emp_orc #### A masked pattern was here #### POSTHOOK: query: analyze table emp_orc compute statistics for columns deptid POSTHOOK: type: QUERY POSTHOOK: Input: default@emp_orc +POSTHOOK: Output: default@emp_orc #### A masked pattern was here #### PREHOOK: query: explain select * from emp_orc PREHOOK: type: QUERY @@ -170,10 +172,12 @@ STAGE PLANS: PREHOOK: query: analyze table emp_orc compute statistics for columns lastname,deptid PREHOOK: type: QUERY PREHOOK: Input: default@emp_orc +PREHOOK: Output: default@emp_orc #### A masked pattern was here #### POSTHOOK: query: analyze table emp_orc compute statistics for columns lastname,deptid POSTHOOK: type: QUERY POSTHOOK: Input: default@emp_orc +POSTHOOK: Output: default@emp_orc #### A masked pattern was here #### PREHOOK: query: explain select * from emp_orc PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/annotate_stats_union.q.out b/ql/src/test/results/clientpositive/annotate_stats_union.q.out index 059f261..3b4b169 100644 --- a/ql/src/test/results/clientpositive/annotate_stats_union.q.out +++ b/ql/src/test/results/clientpositive/annotate_stats_union.q.out @@ -55,10 +55,12 @@ POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name PREHOOK: query: analyze table loc_orc compute statistics for columns state,locid,zip,year PREHOOK: type: QUERY PREHOOK: Input: default@loc_orc +PREHOOK: Output: default@loc_orc #### A masked pattern was here #### POSTHOOK: query: analyze table loc_orc compute statistics for columns state,locid,zip,year POSTHOOK: type: QUERY POSTHOOK: Input: default@loc_orc +POSTHOOK: Output: default@loc_orc #### A masked pattern was here #### PREHOOK: query: explain select state from loc_orc PREHOOK: type: QUERY @@ -282,18 +284,22 @@ PREHOOK: query: analyze table loc_staging compute statistics for columns state,l PREHOOK: type: QUERY PREHOOK: Input: test@loc_staging #### A masked pattern was here #### +PREHOOK: Output: test@loc_staging POSTHOOK: query: analyze table loc_staging compute statistics for columns state,locid,zip,year POSTHOOK: type: QUERY POSTHOOK: Input: test@loc_staging #### A masked pattern was here #### +POSTHOOK: Output: test@loc_staging PREHOOK: query: analyze table loc_orc compute statistics for columns state,locid,zip,year PREHOOK: type: QUERY PREHOOK: Input: test@loc_orc #### A masked pattern was here #### +PREHOOK: Output: test@loc_orc POSTHOOK: query: analyze table loc_orc compute statistics for columns state,locid,zip,year POSTHOOK: type: QUERY POSTHOOK: Input: test@loc_orc #### A masked pattern was here #### +POSTHOOK: Output: test@loc_orc PREHOOK: query: explain select * from (select state from default.loc_orc union all select state from test.loc_orc) temp PREHOOK: type: QUERY POSTHOOK: query: explain select * from (select state from default.loc_orc union all select state from test.loc_orc) temp diff --git a/ql/src/test/results/clientpositive/autoColumnStats_3.q.out b/ql/src/test/results/clientpositive/autoColumnStats_3.q.out index dca158b..efdbdd6 100644 --- a/ql/src/test/results/clientpositive/autoColumnStats_3.q.out +++ b/ql/src/test/results/clientpositive/autoColumnStats_3.q.out @@ -13,10 +13,12 @@ POSTHOOK: Output: default@src_multi1 PREHOOK: query: analyze table src_multi1 compute statistics for columns key PREHOOK: type: QUERY PREHOOK: Input: default@src_multi1 +PREHOOK: Output: default@src_multi1 #### A masked pattern was here #### POSTHOOK: query: analyze table src_multi1 compute statistics for columns key POSTHOOK: type: QUERY POSTHOOK: Input: default@src_multi1 +POSTHOOK: Output: default@src_multi1 #### A masked pattern was here #### PREHOOK: query: describe formatted src_multi1 PREHOOK: type: DESCTABLE diff --git a/ql/src/test/results/clientpositive/avro_decimal.q.out b/ql/src/test/results/clientpositive/avro_decimal.q.out index 64e65ca..11aaa15 100644 --- a/ql/src/test/results/clientpositive/avro_decimal.q.out +++ b/ql/src/test/results/clientpositive/avro_decimal.q.out @@ -21,10 +21,12 @@ POSTHOOK: Output: default@dec PREHOOK: query: ANALYZE TABLE dec COMPUTE STATISTICS FOR COLUMNS value PREHOOK: type: QUERY PREHOOK: Input: default@dec +PREHOOK: Output: default@dec #### A masked pattern was here #### POSTHOOK: query: ANALYZE TABLE dec COMPUTE STATISTICS FOR COLUMNS value POSTHOOK: type: QUERY POSTHOOK: Input: default@dec +POSTHOOK: Output: default@dec #### A masked pattern was here #### PREHOOK: query: DESC FORMATTED dec value PREHOOK: type: DESCTABLE diff --git a/ql/src/test/results/clientpositive/avro_decimal_native.q.out b/ql/src/test/results/clientpositive/avro_decimal_native.q.out index cebc342..1fd98fa 100644 --- a/ql/src/test/results/clientpositive/avro_decimal_native.q.out +++ b/ql/src/test/results/clientpositive/avro_decimal_native.q.out @@ -25,10 +25,12 @@ POSTHOOK: Output: default@dec PREHOOK: query: ANALYZE TABLE dec COMPUTE STATISTICS FOR COLUMNS value PREHOOK: type: QUERY PREHOOK: Input: default@dec +PREHOOK: Output: default@dec #### A masked pattern was here #### POSTHOOK: query: ANALYZE TABLE dec COMPUTE STATISTICS FOR COLUMNS value POSTHOOK: type: QUERY POSTHOOK: Input: default@dec +POSTHOOK: Output: default@dec #### A masked pattern was here #### PREHOOK: query: DESC FORMATTED dec value PREHOOK: type: DESCTABLE diff --git a/ql/src/test/results/clientpositive/cbo_rp_annotate_stats_groupby.q.out b/ql/src/test/results/clientpositive/cbo_rp_annotate_stats_groupby.q.out index f260f03..045b0a9 100644 --- a/ql/src/test/results/clientpositive/cbo_rp_annotate_stats_groupby.q.out +++ b/ql/src/test/results/clientpositive/cbo_rp_annotate_stats_groupby.q.out @@ -76,10 +76,12 @@ STAGE PLANS: PREHOOK: query: analyze table loc_orc compute statistics for columns state PREHOOK: type: QUERY PREHOOK: Input: default@loc_orc +PREHOOK: Output: default@loc_orc #### A masked pattern was here #### POSTHOOK: query: analyze table loc_orc compute statistics for columns state POSTHOOK: type: QUERY POSTHOOK: Input: default@loc_orc +POSTHOOK: Output: default@loc_orc #### A masked pattern was here #### PREHOOK: query: explain select a, c, min(b) from ( select state as a, locid as b, count(*) as c @@ -177,10 +179,12 @@ STAGE PLANS: PREHOOK: query: analyze table loc_orc compute statistics for columns state,locid,year PREHOOK: type: QUERY PREHOOK: Input: default@loc_orc +PREHOOK: Output: default@loc_orc #### A masked pattern was here #### POSTHOOK: query: analyze table loc_orc compute statistics for columns state,locid,year POSTHOOK: type: QUERY POSTHOOK: Input: default@loc_orc +POSTHOOK: Output: default@loc_orc #### A masked pattern was here #### PREHOOK: query: explain select year from loc_orc group by year PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/cbo_rp_auto_join1.q.out b/ql/src/test/results/clientpositive/cbo_rp_auto_join1.q.out index fe4bc4f..96f2d96 100644 --- a/ql/src/test/results/clientpositive/cbo_rp_auto_join1.q.out +++ b/ql/src/test/results/clientpositive/cbo_rp_auto_join1.q.out @@ -49,10 +49,12 @@ POSTHOOK: Output: default@tbl1 PREHOOK: query: analyze table tbl1 compute statistics for columns PREHOOK: type: QUERY PREHOOK: Input: default@tbl1 +PREHOOK: Output: default@tbl1 #### A masked pattern was here #### POSTHOOK: query: analyze table tbl1 compute statistics for columns POSTHOOK: type: QUERY POSTHOOK: Input: default@tbl1 +POSTHOOK: Output: default@tbl1 #### A masked pattern was here #### PREHOOK: query: analyze table tbl2 compute statistics PREHOOK: type: QUERY @@ -65,10 +67,12 @@ POSTHOOK: Output: default@tbl2 PREHOOK: query: analyze table tbl2 compute statistics for columns PREHOOK: type: QUERY PREHOOK: Input: default@tbl2 +PREHOOK: Output: default@tbl2 #### A masked pattern was here #### POSTHOOK: query: analyze table tbl2 compute statistics for columns POSTHOOK: type: QUERY POSTHOOK: Input: default@tbl2 +POSTHOOK: Output: default@tbl2 #### A masked pattern was here #### PREHOOK: query: explain select count(*) from ( diff --git a/ql/src/test/results/clientpositive/colstats_all_nulls.q.out b/ql/src/test/results/clientpositive/colstats_all_nulls.q.out index f67f81b..b0a941a 100644 --- a/ql/src/test/results/clientpositive/colstats_all_nulls.q.out +++ b/ql/src/test/results/clientpositive/colstats_all_nulls.q.out @@ -30,10 +30,12 @@ POSTHOOK: Lineage: all_nulls.c SIMPLE [] PREHOOK: query: analyze table all_nulls compute statistics for columns PREHOOK: type: QUERY PREHOOK: Input: default@all_nulls +PREHOOK: Output: default@all_nulls #### A masked pattern was here #### POSTHOOK: query: analyze table all_nulls compute statistics for columns POSTHOOK: type: QUERY POSTHOOK: Input: default@all_nulls +POSTHOOK: Output: default@all_nulls #### A masked pattern was here #### PREHOOK: query: describe formatted all_nulls a PREHOOK: type: DESCTABLE diff --git a/ql/src/test/results/clientpositive/columnStatsUpdateForStatsOptimizer_2.q.out b/ql/src/test/results/clientpositive/columnStatsUpdateForStatsOptimizer_2.q.out index af21343..8449f7e 100644 --- a/ql/src/test/results/clientpositive/columnStatsUpdateForStatsOptimizer_2.q.out +++ b/ql/src/test/results/clientpositive/columnStatsUpdateForStatsOptimizer_2.q.out @@ -56,10 +56,12 @@ Storage Desc Params: PREHOOK: query: analyze table calendar compute statistics for columns year PREHOOK: type: QUERY PREHOOK: Input: default@calendar +PREHOOK: Output: default@calendar #### A masked pattern was here #### POSTHOOK: query: analyze table calendar compute statistics for columns year POSTHOOK: type: QUERY POSTHOOK: Input: default@calendar +POSTHOOK: Output: default@calendar #### A masked pattern was here #### PREHOOK: query: desc formatted calendar PREHOOK: type: DESCTABLE diff --git a/ql/src/test/results/clientpositive/column_table_stats.q.out b/ql/src/test/results/clientpositive/column_table_stats.q.out new file mode 100644 index 0000000..f1ee4cc --- /dev/null +++ b/ql/src/test/results/clientpositive/column_table_stats.q.out @@ -0,0 +1,616 @@ +PREHOOK: query: DROP TABLE IF EXISTS s +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE IF EXISTS s +POSTHOOK: type: DROPTABLE +PREHOOK: query: CREATE TABLE s (key STRING COMMENT 'default', value STRING COMMENT 'default') STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@s +POSTHOOK: query: CREATE TABLE s (key STRING COMMENT 'default', value STRING COMMENT 'default') STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@s +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/kv1.txt' INTO TABLE s +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@s +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/kv1.txt' INTO TABLE s +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@s +PREHOOK: query: desc formatted s +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@s +POSTHOOK: query: desc formatted s +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@s +# col_name data_type comment + +key string default +value string default + +# Detailed Table Information +Database: default +#### A masked pattern was here #### +Retention: 0 +#### A masked pattern was here #### +Table Type: MANAGED_TABLE +Table Parameters: + numFiles 1 + numRows 0 + rawDataSize 0 + totalSize 5812 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: explain extended analyze table s compute statistics for columns +PREHOOK: type: QUERY +POSTHOOK: query: explain extended analyze table s compute statistics for columns +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + Stage-1 depends on stages: Stage-0, Stage-2 + Stage-2 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-0 + Map Reduce + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Statistics Aggregation Key Prefix: default.s/ + GatherStats: true + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: key, value + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: s + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + column.name.delimiter , + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.s + numFiles 1 + numRows 0 + rawDataSize 0 + serialization.ddl struct s { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + column.name.delimiter , + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.s + numFiles 1 + numRows 0 + rawDataSize 0 + serialization.ddl struct s { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.s + name: default.s + Truncated Path -> Alias: + /s [s] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-1 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.s + Is Table Level Stats: true + + Stage: Stage-2 + Stats-Aggr Operator + Stats Aggregation Key Prefix: default.s/ + +PREHOOK: query: analyze table s compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@s +PREHOOK: Output: default@s +#### A masked pattern was here #### +POSTHOOK: query: analyze table s compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@s +POSTHOOK: Output: default@s +#### A masked pattern was here #### +PREHOOK: query: desc formatted s +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@s +POSTHOOK: query: desc formatted s +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@s +# col_name data_type comment + +key string default +value string default + +# Detailed Table Information +Database: default +#### A masked pattern was here #### +Retention: 0 +#### A masked pattern was here #### +Table Type: MANAGED_TABLE +Table Parameters: + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} + numFiles 1 + numRows 500 + rawDataSize 5312 + totalSize 5812 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: DROP TABLE IF EXISTS spart +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE IF EXISTS spart +POSTHOOK: type: DROPTABLE +PREHOOK: query: CREATE TABLE spart (key STRING COMMENT 'default', value STRING COMMENT 'default') +PARTITIONED BY (ds STRING, hr STRING) +STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@spart +POSTHOOK: query: CREATE TABLE spart (key STRING COMMENT 'default', value STRING COMMENT 'default') +PARTITIONED BY (ds STRING, hr STRING) +STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@spart +PREHOOK: query: LOAD DATA LOCAL INPATH "../../data/files/kv1.txt" +OVERWRITE INTO TABLE spart PARTITION (ds="2008-04-08", hr="11") +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@spart +POSTHOOK: query: LOAD DATA LOCAL INPATH "../../data/files/kv1.txt" +OVERWRITE INTO TABLE spart PARTITION (ds="2008-04-08", hr="11") +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@spart +POSTHOOK: Output: default@spart@ds=2008-04-08/hr=11 +PREHOOK: query: LOAD DATA LOCAL INPATH "../../data/files/kv1.txt" +OVERWRITE INTO TABLE spart PARTITION (ds="2008-04-08", hr="12") +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@spart +POSTHOOK: query: LOAD DATA LOCAL INPATH "../../data/files/kv1.txt" +OVERWRITE INTO TABLE spart PARTITION (ds="2008-04-08", hr="12") +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@spart +POSTHOOK: Output: default@spart@ds=2008-04-08/hr=12 +PREHOOK: query: desc formatted spart +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@spart +POSTHOOK: query: desc formatted spart +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@spart +# col_name data_type comment + +key string default +value string default + +# Partition Information +# col_name data_type comment + +ds string +hr string + +# Detailed Table Information +Database: default +#### A masked pattern was here #### +Retention: 0 +#### A masked pattern was here #### +Table Type: MANAGED_TABLE +Table Parameters: +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: explain extended analyze table spart compute statistics for columns +PREHOOK: type: QUERY +POSTHOOK: query: explain extended analyze table spart compute statistics for columns +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + Stage-1 depends on stages: Stage-0, Stage-2 + Stage-2 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-0 + Map Reduce + Map Operator Tree: + TableScan + alias: spart + Statistics: Num rows: 58 Data size: 11624 Basic stats: COMPLETE Column stats: NONE + Statistics Aggregation Key Prefix: default.spart/ + GatherStats: true + Select Operator + expressions: ds (type: string), hr (type: string), key (type: string), value (type: string) + outputColumnNames: ds, hr, key, value + Statistics: Num rows: 58 Data size: 11624 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 58 Data size: 11624 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + null sort order: aa + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 58 Data size: 11624 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col2 (type: struct), _col3 (type: struct) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: hr=11 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + hr 11 + properties: + bucket_count -1 + column.name.delimiter , + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.spart + numFiles 1 + numRows 0 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 0 + serialization.ddl struct spart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + column.name.delimiter , + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.spart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct spart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.spart + name: default.spart +#### A masked pattern was here #### + Partition + base file name: hr=12 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + hr 12 + properties: + bucket_count -1 + column.name.delimiter , + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.spart + numFiles 1 + numRows 0 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 0 + serialization.ddl struct spart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + column.name.delimiter , + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.spart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct spart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.spart + name: default.spart + Truncated Path -> Alias: + /spart/ds=2008-04-08/hr=11 [spart] + /spart/ds=2008-04-08/hr=12 [spart] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types struct:struct:string:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-1 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.spart + Is Table Level Stats: false + + Stage: Stage-2 + Stats-Aggr Operator + Stats Aggregation Key Prefix: default.spart/ + +PREHOOK: query: analyze table spart compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@spart +PREHOOK: Input: default@spart@ds=2008-04-08/hr=11 +PREHOOK: Input: default@spart@ds=2008-04-08/hr=12 +PREHOOK: Output: default@spart +PREHOOK: Output: default@spart@ds=2008-04-08/hr=11 +PREHOOK: Output: default@spart@ds=2008-04-08/hr=12 +#### A masked pattern was here #### +POSTHOOK: query: analyze table spart compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@spart +POSTHOOK: Input: default@spart@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@spart@ds=2008-04-08/hr=12 +POSTHOOK: Output: default@spart +POSTHOOK: Output: default@spart@ds=2008-04-08/hr=11 +POSTHOOK: Output: default@spart@ds=2008-04-08/hr=12 +#### A masked pattern was here #### +PREHOOK: query: desc formatted spart +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@spart +POSTHOOK: query: desc formatted spart +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@spart +# col_name data_type comment + +key string default +value string default + +# Partition Information +# col_name data_type comment + +ds string +hr string + +# Detailed Table Information +Database: default +#### A masked pattern was here #### +Retention: 0 +#### A masked pattern was here #### +Table Type: MANAGED_TABLE +Table Parameters: +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: desc formatted spart PARTITION(ds='2008-04-08', hr=11) +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@spart +POSTHOOK: query: desc formatted spart PARTITION(ds='2008-04-08', hr=11) +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@spart +# col_name data_type comment + +key string default +value string default + +# Partition Information +# col_name data_type comment + +ds string +hr string + +# Detailed Partition Information +Partition Value: [2008-04-08, 11] +Database: default +Table: spart +#### A masked pattern was here #### +Partition Parameters: + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} + numFiles 1 + numRows 500 + rawDataSize 5312 + totalSize 5812 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: desc formatted spart PARTITION(ds='2008-04-08', hr=12) +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@spart +POSTHOOK: query: desc formatted spart PARTITION(ds='2008-04-08', hr=12) +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@spart +# col_name data_type comment + +key string default +value string default + +# Partition Information +# col_name data_type comment + +ds string +hr string + +# Detailed Partition Information +Partition Value: [2008-04-08, 12] +Database: default +Table: spart +#### A masked pattern was here #### +Partition Parameters: + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} + numFiles 1 + numRows 500 + rawDataSize 5312 + totalSize 5812 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 diff --git a/ql/src/test/results/clientpositive/columnstats_partlvl.q.out b/ql/src/test/results/clientpositive/columnstats_partlvl.q.out index 5c9f901..5b9cc37 100644 --- a/ql/src/test/results/clientpositive/columnstats_partlvl.q.out +++ b/ql/src/test/results/clientpositive/columnstats_partlvl.q.out @@ -529,7 +529,8 @@ analyze table Employee_Part compute statistics for columns POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-0 is a root stage - Stage-1 depends on stages: Stage-0 + Stage-1 depends on stages: Stage-0, Stage-2 + Stage-2 depends on stages: Stage-0 STAGE PLANS: Stage: Stage-0 @@ -580,17 +581,26 @@ STAGE PLANS: Column Types: int, string Table: default.employee_part + Stage: Stage-2 + Stats-Aggr Operator + PREHOOK: query: analyze table Employee_Part compute statistics for columns PREHOOK: type: QUERY PREHOOK: Input: default@employee_part PREHOOK: Input: default@employee_part@employeesalary=2000.0 PREHOOK: Input: default@employee_part@employeesalary=4000.0 +PREHOOK: Output: default@employee_part +PREHOOK: Output: default@employee_part@employeesalary=2000.0 +PREHOOK: Output: default@employee_part@employeesalary=4000.0 #### A masked pattern was here #### POSTHOOK: query: analyze table Employee_Part compute statistics for columns POSTHOOK: type: QUERY POSTHOOK: Input: default@employee_part POSTHOOK: Input: default@employee_part@employeesalary=2000.0 POSTHOOK: Input: default@employee_part@employeesalary=4000.0 +POSTHOOK: Output: default@employee_part +POSTHOOK: Output: default@employee_part@employeesalary=2000.0 +POSTHOOK: Output: default@employee_part@employeesalary=4000.0 #### A masked pattern was here #### PREHOOK: query: describe formatted Employee_Part partition(employeeSalary=2000.0) employeeID PREHOOK: type: DESCTABLE @@ -618,7 +628,8 @@ analyze table Employee_Part compute statistics for columns POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-0 is a root stage - Stage-1 depends on stages: Stage-0 + Stage-1 depends on stages: Stage-0, Stage-2 + Stage-2 depends on stages: Stage-0 STAGE PLANS: Stage: Stage-0 @@ -626,11 +637,11 @@ STAGE PLANS: Map Operator Tree: TableScan alias: employee_part - Statistics: Num rows: 2 Data size: 210 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 184 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: employeeid (type: int), employeename (type: string) outputColumnNames: employeeid, employeename - Statistics: Num rows: 2 Data size: 210 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 184 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: compute_stats(employeeid, 16), compute_stats(employeename, 16) mode: hash @@ -661,17 +672,26 @@ STAGE PLANS: Column Types: int, string Table: default.employee_part + Stage: Stage-2 + Stats-Aggr Operator + PREHOOK: query: analyze table Employee_Part compute statistics for columns PREHOOK: type: QUERY PREHOOK: Input: default@employee_part PREHOOK: Input: default@employee_part@employeesalary=2000.0 PREHOOK: Input: default@employee_part@employeesalary=4000.0 +PREHOOK: Output: default@employee_part +PREHOOK: Output: default@employee_part@employeesalary=2000.0 +PREHOOK: Output: default@employee_part@employeesalary=4000.0 #### A masked pattern was here #### POSTHOOK: query: analyze table Employee_Part compute statistics for columns POSTHOOK: type: QUERY POSTHOOK: Input: default@employee_part POSTHOOK: Input: default@employee_part@employeesalary=2000.0 POSTHOOK: Input: default@employee_part@employeesalary=4000.0 +POSTHOOK: Output: default@employee_part +POSTHOOK: Output: default@employee_part@employeesalary=2000.0 +POSTHOOK: Output: default@employee_part@employeesalary=4000.0 #### A masked pattern was here #### PREHOOK: query: describe formatted Employee_Part employeeID PREHOOK: type: DESCTABLE @@ -718,12 +738,19 @@ PREHOOK: type: QUERY PREHOOK: Input: default@employee_part PREHOOK: Input: default@employee_part@employeesalary=2000.0 PREHOOK: Input: default@employee_part@employeesalary=4000.0 +PREHOOK: Output: default@employee_part +PREHOOK: Output: default@employee_part@employeesalary=2000.0 +PREHOOK: Output: default@employee_part@employeesalary=4000.0 #### A masked pattern was here #### +Cannot get table employee_part POSTHOOK: query: analyze table default.Employee_Part compute statistics for columns POSTHOOK: type: QUERY POSTHOOK: Input: default@employee_part POSTHOOK: Input: default@employee_part@employeesalary=2000.0 POSTHOOK: Input: default@employee_part@employeesalary=4000.0 +POSTHOOK: Output: default@employee_part +POSTHOOK: Output: default@employee_part@employeesalary=2000.0 +POSTHOOK: Output: default@employee_part@employeesalary=4000.0 #### A masked pattern was here #### PREHOOK: query: use default PREHOOK: type: SWITCHDATABASE diff --git a/ql/src/test/results/clientpositive/columnstats_partlvl_dp.q.out b/ql/src/test/results/clientpositive/columnstats_partlvl_dp.q.out index 08fb42e..948ad86 100644 --- a/ql/src/test/results/clientpositive/columnstats_partlvl_dp.q.out +++ b/ql/src/test/results/clientpositive/columnstats_partlvl_dp.q.out @@ -241,7 +241,8 @@ analyze table Employee_Part partition (employeeSalary) compute statistics for co POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-0 is a root stage - Stage-1 depends on stages: Stage-0 + Stage-1 depends on stages: Stage-0, Stage-2 + Stage-2 depends on stages: Stage-0 STAGE PLANS: Stage: Stage-0 @@ -292,6 +293,9 @@ STAGE PLANS: Column Types: int Table: default.employee_part + Stage: Stage-2 + Stats-Aggr Operator + PREHOOK: query: analyze table Employee_Part partition (employeeSalary) compute statistics for columns employeeID PREHOOK: type: QUERY PREHOOK: Input: default@employee_part @@ -301,6 +305,13 @@ PREHOOK: Input: default@employee_part@employeesalary=3000.0/country=UK PREHOOK: Input: default@employee_part@employeesalary=3000.0/country=USA PREHOOK: Input: default@employee_part@employeesalary=3500.0/country=UK PREHOOK: Input: default@employee_part@employeesalary=4000.0/country=USA +PREHOOK: Output: default@employee_part +PREHOOK: Output: default@employee_part@employeesalary=2000.0/country=UK +PREHOOK: Output: default@employee_part@employeesalary=2000.0/country=USA +PREHOOK: Output: default@employee_part@employeesalary=3000.0/country=UK +PREHOOK: Output: default@employee_part@employeesalary=3000.0/country=USA +PREHOOK: Output: default@employee_part@employeesalary=3500.0/country=UK +PREHOOK: Output: default@employee_part@employeesalary=4000.0/country=USA #### A masked pattern was here #### POSTHOOK: query: analyze table Employee_Part partition (employeeSalary) compute statistics for columns employeeID POSTHOOK: type: QUERY @@ -311,6 +322,13 @@ POSTHOOK: Input: default@employee_part@employeesalary=3000.0/country=UK POSTHOOK: Input: default@employee_part@employeesalary=3000.0/country=USA POSTHOOK: Input: default@employee_part@employeesalary=3500.0/country=UK POSTHOOK: Input: default@employee_part@employeesalary=4000.0/country=USA +POSTHOOK: Output: default@employee_part +POSTHOOK: Output: default@employee_part@employeesalary=2000.0/country=UK +POSTHOOK: Output: default@employee_part@employeesalary=2000.0/country=USA +POSTHOOK: Output: default@employee_part@employeesalary=3000.0/country=UK +POSTHOOK: Output: default@employee_part@employeesalary=3000.0/country=USA +POSTHOOK: Output: default@employee_part@employeesalary=3500.0/country=UK +POSTHOOK: Output: default@employee_part@employeesalary=4000.0/country=USA #### A masked pattern was here #### PREHOOK: query: describe formatted Employee_Part partition (employeeSalary='3000.0', country='UK') employeeID PREHOOK: type: DESCTABLE @@ -329,7 +347,8 @@ analyze table Employee_Part partition (employeeSalary,country) compute statistic POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-0 is a root stage - Stage-1 depends on stages: Stage-0 + Stage-1 depends on stages: Stage-0, Stage-2 + Stage-2 depends on stages: Stage-0 STAGE PLANS: Stage: Stage-0 @@ -337,22 +356,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: employee_part - Statistics: Num rows: 2 Data size: 466 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 54 Data size: 412 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: employeesalary (type: double), country (type: string), employeeid (type: int), employeename (type: string) outputColumnNames: employeesalary, country, employeeid, employeename - Statistics: Num rows: 2 Data size: 466 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 54 Data size: 412 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: compute_stats(employeeid, 16), compute_stats(employeename, 16) keys: employeesalary (type: double), country (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 2 Data size: 466 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 54 Data size: 412 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: double), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: double), _col1 (type: string) - Statistics: Num rows: 2 Data size: 466 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 54 Data size: 412 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: struct), _col3 (type: struct) Reduce Operator Tree: Group By Operator @@ -360,14 +379,14 @@ STAGE PLANS: keys: KEY._col0 (type: double), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 233 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 27 Data size: 206 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: double), _col1 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 233 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 27 Data size: 206 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 233 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 27 Data size: 206 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -380,6 +399,9 @@ STAGE PLANS: Column Types: int, string Table: default.employee_part + Stage: Stage-2 + Stats-Aggr Operator + PREHOOK: query: analyze table Employee_Part partition (employeeSalary,country) compute statistics for columns PREHOOK: type: QUERY PREHOOK: Input: default@employee_part @@ -389,6 +411,13 @@ PREHOOK: Input: default@employee_part@employeesalary=3000.0/country=UK PREHOOK: Input: default@employee_part@employeesalary=3000.0/country=USA PREHOOK: Input: default@employee_part@employeesalary=3500.0/country=UK PREHOOK: Input: default@employee_part@employeesalary=4000.0/country=USA +PREHOOK: Output: default@employee_part +PREHOOK: Output: default@employee_part@employeesalary=2000.0/country=UK +PREHOOK: Output: default@employee_part@employeesalary=2000.0/country=USA +PREHOOK: Output: default@employee_part@employeesalary=3000.0/country=UK +PREHOOK: Output: default@employee_part@employeesalary=3000.0/country=USA +PREHOOK: Output: default@employee_part@employeesalary=3500.0/country=UK +PREHOOK: Output: default@employee_part@employeesalary=4000.0/country=USA #### A masked pattern was here #### POSTHOOK: query: analyze table Employee_Part partition (employeeSalary,country) compute statistics for columns POSTHOOK: type: QUERY @@ -399,6 +428,13 @@ POSTHOOK: Input: default@employee_part@employeesalary=3000.0/country=UK POSTHOOK: Input: default@employee_part@employeesalary=3000.0/country=USA POSTHOOK: Input: default@employee_part@employeesalary=3500.0/country=UK POSTHOOK: Input: default@employee_part@employeesalary=4000.0/country=USA +POSTHOOK: Output: default@employee_part +POSTHOOK: Output: default@employee_part@employeesalary=2000.0/country=UK +POSTHOOK: Output: default@employee_part@employeesalary=2000.0/country=USA +POSTHOOK: Output: default@employee_part@employeesalary=3000.0/country=UK +POSTHOOK: Output: default@employee_part@employeesalary=3000.0/country=USA +POSTHOOK: Output: default@employee_part@employeesalary=3500.0/country=UK +POSTHOOK: Output: default@employee_part@employeesalary=4000.0/country=USA #### A masked pattern was here #### PREHOOK: query: describe formatted Employee_Part partition (employeeSalary='3500.0', country='UK') employeeName PREHOOK: type: DESCTABLE @@ -466,6 +502,11 @@ PREHOOK: Input: default@employee@employeesalary=2000.0/country=UK PREHOOK: Input: default@employee@employeesalary=2000.0/country=USA PREHOOK: Input: default@employee@employeesalary=3000.0/country=UK PREHOOK: Input: default@employee@employeesalary=3500.0/country=UK +PREHOOK: Output: default@employee +PREHOOK: Output: default@employee@employeesalary=2000.0/country=UK +PREHOOK: Output: default@employee@employeesalary=2000.0/country=USA +PREHOOK: Output: default@employee@employeesalary=3000.0/country=UK +PREHOOK: Output: default@employee@employeesalary=3500.0/country=UK #### A masked pattern was here #### POSTHOOK: query: analyze table Employee partition (employeeSalary,country) compute statistics for columns POSTHOOK: type: QUERY @@ -474,6 +515,11 @@ POSTHOOK: Input: default@employee@employeesalary=2000.0/country=UK POSTHOOK: Input: default@employee@employeesalary=2000.0/country=USA POSTHOOK: Input: default@employee@employeesalary=3000.0/country=UK POSTHOOK: Input: default@employee@employeesalary=3500.0/country=UK +POSTHOOK: Output: default@employee +POSTHOOK: Output: default@employee@employeesalary=2000.0/country=UK +POSTHOOK: Output: default@employee@employeesalary=2000.0/country=USA +POSTHOOK: Output: default@employee@employeesalary=3000.0/country=UK +POSTHOOK: Output: default@employee@employeesalary=3500.0/country=UK #### A masked pattern was here #### PREHOOK: query: describe formatted Employee partition (employeeSalary='3500.0', country='UK') employeeName PREHOOK: type: DESCTABLE @@ -511,6 +557,13 @@ PREHOOK: Input: default@employee@employeesalary=3000.0/country=UK PREHOOK: Input: default@employee@employeesalary=3000.0/country=USA PREHOOK: Input: default@employee@employeesalary=3500.0/country=UK PREHOOK: Input: default@employee@employeesalary=4000.0/country=USA +PREHOOK: Output: default@employee +PREHOOK: Output: default@employee@employeesalary=2000.0/country=UK +PREHOOK: Output: default@employee@employeesalary=2000.0/country=USA +PREHOOK: Output: default@employee@employeesalary=3000.0/country=UK +PREHOOK: Output: default@employee@employeesalary=3000.0/country=USA +PREHOOK: Output: default@employee@employeesalary=3500.0/country=UK +PREHOOK: Output: default@employee@employeesalary=4000.0/country=USA #### A masked pattern was here #### POSTHOOK: query: analyze table Employee partition (employeeSalary) compute statistics for columns POSTHOOK: type: QUERY @@ -521,6 +574,13 @@ POSTHOOK: Input: default@employee@employeesalary=3000.0/country=UK POSTHOOK: Input: default@employee@employeesalary=3000.0/country=USA POSTHOOK: Input: default@employee@employeesalary=3500.0/country=UK POSTHOOK: Input: default@employee@employeesalary=4000.0/country=USA +POSTHOOK: Output: default@employee +POSTHOOK: Output: default@employee@employeesalary=2000.0/country=UK +POSTHOOK: Output: default@employee@employeesalary=2000.0/country=USA +POSTHOOK: Output: default@employee@employeesalary=3000.0/country=UK +POSTHOOK: Output: default@employee@employeesalary=3000.0/country=USA +POSTHOOK: Output: default@employee@employeesalary=3500.0/country=UK +POSTHOOK: Output: default@employee@employeesalary=4000.0/country=USA #### A masked pattern was here #### PREHOOK: query: describe formatted Employee partition (employeeSalary='3000.0', country='USA') employeeName PREHOOK: type: DESCTABLE diff --git a/ql/src/test/results/clientpositive/columnstats_quoting.q.out b/ql/src/test/results/clientpositive/columnstats_quoting.q.out index 52e3538..7654897 100644 --- a/ql/src/test/results/clientpositive/columnstats_quoting.q.out +++ b/ql/src/test/results/clientpositive/columnstats_quoting.q.out @@ -16,7 +16,8 @@ POSTHOOK: query: explain analyze table user_web_events compute statistics for co POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-0 is a root stage - Stage-1 depends on stages: Stage-0 + Stage-1 depends on stages: Stage-0, Stage-2 + Stage-2 depends on stages: Stage-0 STAGE PLANS: Stage: Stage-0 @@ -59,13 +60,18 @@ STAGE PLANS: Column Types: bigint, string Table: default.user_web_events + Stage: Stage-2 + Stats-Aggr Operator + PREHOOK: query: analyze table user_web_events compute statistics for columns PREHOOK: type: QUERY PREHOOK: Input: default@user_web_events +PREHOOK: Output: default@user_web_events #### A masked pattern was here #### POSTHOOK: query: analyze table user_web_events compute statistics for columns POSTHOOK: type: QUERY POSTHOOK: Input: default@user_web_events +POSTHOOK: Output: default@user_web_events #### A masked pattern was here #### PREHOOK: query: explain analyze table user_web_events compute statistics for columns `user id` PREHOOK: type: QUERY @@ -73,7 +79,8 @@ POSTHOOK: query: explain analyze table user_web_events compute statistics for co POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-0 is a root stage - Stage-1 depends on stages: Stage-0 + Stage-1 depends on stages: Stage-0, Stage-2 + Stage-2 depends on stages: Stage-0 STAGE PLANS: Stage: Stage-0 @@ -116,11 +123,16 @@ STAGE PLANS: Column Types: bigint Table: default.user_web_events + Stage: Stage-2 + Stats-Aggr Operator + PREHOOK: query: analyze table user_web_events compute statistics for columns `user id` PREHOOK: type: QUERY PREHOOK: Input: default@user_web_events +PREHOOK: Output: default@user_web_events #### A masked pattern was here #### POSTHOOK: query: analyze table user_web_events compute statistics for columns `user id` POSTHOOK: type: QUERY POSTHOOK: Input: default@user_web_events +POSTHOOK: Output: default@user_web_events #### A masked pattern was here #### diff --git a/ql/src/test/results/clientpositive/columnstats_tbllvl.q.out b/ql/src/test/results/clientpositive/columnstats_tbllvl.q.out index 60652e0..965315d 100644 --- a/ql/src/test/results/clientpositive/columnstats_tbllvl.q.out +++ b/ql/src/test/results/clientpositive/columnstats_tbllvl.q.out @@ -46,7 +46,8 @@ analyze table UserVisits_web_text_none compute statistics for columns sourceIP, POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-0 is a root stage - Stage-1 depends on stages: Stage-0 + Stage-1 depends on stages: Stage-0, Stage-2 + Stage-2 depends on stages: Stage-0 STAGE PLANS: Stage: Stage-0 @@ -89,6 +90,9 @@ STAGE PLANS: Column Types: string, int, float Table: default.uservisits_web_text_none + Stage: Stage-2 + Stats-Aggr Operator + PREHOOK: query: explain extended analyze table UserVisits_web_text_none compute statistics for columns sourceIP, avgTimeOnSite, adRevenue PREHOOK: type: QUERY @@ -97,7 +101,8 @@ analyze table UserVisits_web_text_none compute statistics for columns sourceIP, POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-0 is a root stage - Stage-1 depends on stages: Stage-0 + Stage-1 depends on stages: Stage-0, Stage-2 + Stage-2 depends on stages: Stage-0 STAGE PLANS: Stage: Stage-0 @@ -106,7 +111,8 @@ STAGE PLANS: TableScan alias: uservisits_web_text_none Statistics: Num rows: 65 Data size: 7060 Basic stats: COMPLETE Column stats: NONE - GatherStats: false + Statistics Aggregation Key Prefix: default.uservisits_web_text_none/ + GatherStats: true Select Operator expressions: sourceip (type: string), avgtimeonsite (type: int), adrevenue (type: float) outputColumnNames: sourceip, avgtimeonsite, adrevenue @@ -212,13 +218,19 @@ STAGE PLANS: Table: default.uservisits_web_text_none Is Table Level Stats: true + Stage: Stage-2 + Stats-Aggr Operator + Stats Aggregation Key Prefix: default.uservisits_web_text_none/ + PREHOOK: query: analyze table UserVisits_web_text_none compute statistics for columns sourceIP, avgTimeOnSite, adRevenue PREHOOK: type: QUERY PREHOOK: Input: default@uservisits_web_text_none +PREHOOK: Output: default@uservisits_web_text_none #### A masked pattern was here #### POSTHOOK: query: analyze table UserVisits_web_text_none compute statistics for columns sourceIP, avgTimeOnSite, adRevenue POSTHOOK: type: QUERY POSTHOOK: Input: default@uservisits_web_text_none +POSTHOOK: Output: default@uservisits_web_text_none #### A masked pattern was here #### PREHOOK: query: explain analyze table default.UserVisits_web_text_none compute statistics for columns @@ -228,7 +240,8 @@ analyze table default.UserVisits_web_text_none compute statistics for columns POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-0 is a root stage - Stage-1 depends on stages: Stage-0 + Stage-1 depends on stages: Stage-0, Stage-2 + Stage-2 depends on stages: Stage-0 STAGE PLANS: Stage: Stage-0 @@ -236,11 +249,11 @@ STAGE PLANS: Map Operator Tree: TableScan alias: uservisits_web_text_none - Statistics: Num rows: 9 Data size: 7060 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 55 Data size: 7005 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: sourceip (type: string), desturl (type: string), visitdate (type: string), adrevenue (type: float), useragent (type: string), ccode (type: string), lcode (type: string), skeyword (type: string), avgtimeonsite (type: int) outputColumnNames: sourceip, desturl, visitdate, adrevenue, useragent, ccode, lcode, skeyword, avgtimeonsite - Statistics: Num rows: 9 Data size: 7060 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 55 Data size: 7005 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: compute_stats(sourceip, 16), compute_stats(desturl, 16), compute_stats(visitdate, 16), compute_stats(adrevenue, 16), compute_stats(useragent, 16), compute_stats(ccode, 16), compute_stats(lcode, 16), compute_stats(skeyword, 16), compute_stats(avgtimeonsite, 16) mode: hash @@ -271,13 +284,18 @@ STAGE PLANS: Column Types: string, string, string, float, string, string, string, string, int Table: default.uservisits_web_text_none + Stage: Stage-2 + Stats-Aggr Operator + PREHOOK: query: analyze table default.UserVisits_web_text_none compute statistics for columns PREHOOK: type: QUERY PREHOOK: Input: default@uservisits_web_text_none +PREHOOK: Output: default@uservisits_web_text_none #### A masked pattern was here #### POSTHOOK: query: analyze table default.UserVisits_web_text_none compute statistics for columns POSTHOOK: type: QUERY POSTHOOK: Input: default@uservisits_web_text_none +POSTHOOK: Output: default@uservisits_web_text_none #### A masked pattern was here #### PREHOOK: query: describe formatted UserVisits_web_text_none destURL PREHOOK: type: DESCTABLE @@ -334,7 +352,8 @@ analyze table empty_tab compute statistics for columns a,b,c,d,e POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-0 is a root stage - Stage-1 depends on stages: Stage-0 + Stage-1 depends on stages: Stage-0, Stage-2 + Stage-2 depends on stages: Stage-0 STAGE PLANS: Stage: Stage-0 @@ -377,13 +396,18 @@ STAGE PLANS: Column Types: int, double, string, boolean, binary Table: default.empty_tab + Stage: Stage-2 + Stats-Aggr Operator + PREHOOK: query: analyze table empty_tab compute statistics for columns a,b,c,d,e PREHOOK: type: QUERY PREHOOK: Input: default@empty_tab +PREHOOK: Output: default@empty_tab #### A masked pattern was here #### POSTHOOK: query: analyze table empty_tab compute statistics for columns a,b,c,d,e POSTHOOK: type: QUERY POSTHOOK: Input: default@empty_tab +POSTHOOK: Output: default@empty_tab #### A masked pattern was here #### PREHOOK: query: create database if not exists dummydb PREHOOK: type: CREATEDATABASE @@ -400,10 +424,13 @@ POSTHOOK: Input: database:dummydb PREHOOK: query: analyze table default.UserVisits_web_text_none compute statistics for columns destURL PREHOOK: type: QUERY PREHOOK: Input: default@uservisits_web_text_none +PREHOOK: Output: default@uservisits_web_text_none #### A masked pattern was here #### +Cannot get table uservisits_web_text_none POSTHOOK: query: analyze table default.UserVisits_web_text_none compute statistics for columns destURL POSTHOOK: type: QUERY POSTHOOK: Input: default@uservisits_web_text_none +POSTHOOK: Output: default@uservisits_web_text_none #### A masked pattern was here #### PREHOOK: query: describe formatted default.UserVisits_web_text_none destURL PREHOOK: type: DESCTABLE @@ -464,7 +491,8 @@ analyze table dummydb.UserVisits_in_dummy_db compute statistics for columns sour POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-0 is a root stage - Stage-1 depends on stages: Stage-0 + Stage-1 depends on stages: Stage-0, Stage-2 + Stage-2 depends on stages: Stage-0 STAGE PLANS: Stage: Stage-0 @@ -507,6 +535,9 @@ STAGE PLANS: Column Types: string, int, float Table: dummydb.uservisits_in_dummy_db + Stage: Stage-2 + Stats-Aggr Operator + PREHOOK: query: explain extended analyze table dummydb.UserVisits_in_dummy_db compute statistics for columns sourceIP, avgTimeOnSite, adRevenue PREHOOK: type: QUERY @@ -515,7 +546,8 @@ analyze table dummydb.UserVisits_in_dummy_db compute statistics for columns sour POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-0 is a root stage - Stage-1 depends on stages: Stage-0 + Stage-1 depends on stages: Stage-0, Stage-2 + Stage-2 depends on stages: Stage-0 STAGE PLANS: Stage: Stage-0 @@ -524,7 +556,8 @@ STAGE PLANS: TableScan alias: uservisits_in_dummy_db Statistics: Num rows: 65 Data size: 7060 Basic stats: COMPLETE Column stats: NONE - GatherStats: false + Statistics Aggregation Key Prefix: dummydb.uservisits_in_dummy_db/ + GatherStats: true Select Operator expressions: sourceip (type: string), avgtimeonsite (type: int), adrevenue (type: float) outputColumnNames: sourceip, avgtimeonsite, adrevenue @@ -630,13 +663,20 @@ STAGE PLANS: Table: dummydb.uservisits_in_dummy_db Is Table Level Stats: true + Stage: Stage-2 + Stats-Aggr Operator + Stats Aggregation Key Prefix: dummydb.uservisits_in_dummy_db/ + PREHOOK: query: analyze table dummydb.UserVisits_in_dummy_db compute statistics for columns sourceIP, avgTimeOnSite, adRevenue PREHOOK: type: QUERY PREHOOK: Input: dummydb@uservisits_in_dummy_db +PREHOOK: Output: dummydb@uservisits_in_dummy_db #### A masked pattern was here #### +Cannot get table uservisits_in_dummy_db POSTHOOK: query: analyze table dummydb.UserVisits_in_dummy_db compute statistics for columns sourceIP, avgTimeOnSite, adRevenue POSTHOOK: type: QUERY POSTHOOK: Input: dummydb@uservisits_in_dummy_db +POSTHOOK: Output: dummydb@uservisits_in_dummy_db #### A masked pattern was here #### PREHOOK: query: explain analyze table dummydb.UserVisits_in_dummy_db compute statistics for columns @@ -646,7 +686,8 @@ analyze table dummydb.UserVisits_in_dummy_db compute statistics for columns POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-0 is a root stage - Stage-1 depends on stages: Stage-0 + Stage-1 depends on stages: Stage-0, Stage-2 + Stage-2 depends on stages: Stage-0 STAGE PLANS: Stage: Stage-0 @@ -654,11 +695,11 @@ STAGE PLANS: Map Operator Tree: TableScan alias: uservisits_in_dummy_db - Statistics: Num rows: 9 Data size: 7060 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 55 Data size: 7005 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: sourceip (type: string), desturl (type: string), visitdate (type: string), adrevenue (type: float), useragent (type: string), ccode (type: string), lcode (type: string), skeyword (type: string), avgtimeonsite (type: int) outputColumnNames: sourceip, desturl, visitdate, adrevenue, useragent, ccode, lcode, skeyword, avgtimeonsite - Statistics: Num rows: 9 Data size: 7060 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 55 Data size: 7005 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: compute_stats(sourceip, 16), compute_stats(desturl, 16), compute_stats(visitdate, 16), compute_stats(adrevenue, 16), compute_stats(useragent, 16), compute_stats(ccode, 16), compute_stats(lcode, 16), compute_stats(skeyword, 16), compute_stats(avgtimeonsite, 16) mode: hash @@ -689,13 +730,19 @@ STAGE PLANS: Column Types: string, string, string, float, string, string, string, string, int Table: dummydb.uservisits_in_dummy_db + Stage: Stage-2 + Stats-Aggr Operator + PREHOOK: query: analyze table dummydb.UserVisits_in_dummy_db compute statistics for columns PREHOOK: type: QUERY PREHOOK: Input: dummydb@uservisits_in_dummy_db +PREHOOK: Output: dummydb@uservisits_in_dummy_db #### A masked pattern was here #### +Cannot get table uservisits_in_dummy_db POSTHOOK: query: analyze table dummydb.UserVisits_in_dummy_db compute statistics for columns POSTHOOK: type: QUERY POSTHOOK: Input: dummydb@uservisits_in_dummy_db +POSTHOOK: Output: dummydb@uservisits_in_dummy_db #### A masked pattern was here #### PREHOOK: query: describe formatted dummydb.UserVisits_in_dummy_db destURL PREHOOK: type: DESCTABLE diff --git a/ql/src/test/results/clientpositive/compustat_avro.q.out b/ql/src/test/results/clientpositive/compustat_avro.q.out index 50c03bd..00c6938 100644 --- a/ql/src/test/results/clientpositive/compustat_avro.q.out +++ b/ql/src/test/results/clientpositive/compustat_avro.q.out @@ -36,10 +36,12 @@ col1 string PREHOOK: query: analyze table testAvro compute statistics for columns col1,col3 PREHOOK: type: QUERY PREHOOK: Input: default@testavro +PREHOOK: Output: default@testavro #### A masked pattern was here #### POSTHOOK: query: analyze table testAvro compute statistics for columns col1,col3 POSTHOOK: type: QUERY POSTHOOK: Input: default@testavro +POSTHOOK: Output: default@testavro #### A masked pattern was here #### PREHOOK: query: describe formatted testAvro col1 PREHOOK: type: DESCTABLE diff --git a/ql/src/test/results/clientpositive/compute_stats_date.q.out b/ql/src/test/results/clientpositive/compute_stats_date.q.out index 73f08ad..407f984 100644 --- a/ql/src/test/results/clientpositive/compute_stats_date.q.out +++ b/ql/src/test/results/clientpositive/compute_stats_date.q.out @@ -52,7 +52,8 @@ analyze table tab_date compute statistics for columns fl_date POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-0 is a root stage - Stage-1 depends on stages: Stage-0 + Stage-1 depends on stages: Stage-0, Stage-2 + Stage-2 depends on stages: Stage-0 STAGE PLANS: Stage: Stage-0 @@ -95,13 +96,18 @@ STAGE PLANS: Column Types: date Table: default.tab_date + Stage: Stage-2 + Stats-Aggr Operator + PREHOOK: query: analyze table tab_date compute statistics for columns fl_date PREHOOK: type: QUERY PREHOOK: Input: default@tab_date +PREHOOK: Output: default@tab_date #### A masked pattern was here #### POSTHOOK: query: analyze table tab_date compute statistics for columns fl_date POSTHOOK: type: QUERY POSTHOOK: Input: default@tab_date +POSTHOOK: Output: default@tab_date #### A masked pattern was here #### PREHOOK: query: describe formatted tab_date fl_date PREHOOK: type: DESCTABLE diff --git a/ql/src/test/results/clientpositive/constGby.q.out b/ql/src/test/results/clientpositive/constGby.q.out index fd8ecc2..ac28f6c 100644 --- a/ql/src/test/results/clientpositive/constGby.q.out +++ b/ql/src/test/results/clientpositive/constGby.q.out @@ -17,10 +17,12 @@ POSTHOOK: Output: default@t1 PREHOOK: query: analyze table t1 compute statistics for columns PREHOOK: type: QUERY PREHOOK: Input: default@t1 +PREHOOK: Output: default@t1 #### A masked pattern was here #### POSTHOOK: query: analyze table t1 compute statistics for columns POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 +POSTHOOK: Output: default@t1 #### A masked pattern was here #### PREHOOK: query: explain select count(1) from t1 group by 1 PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/constant_prop_3.q.out b/ql/src/test/results/clientpositive/constant_prop_3.q.out index ecf6e57..41e7bea 100644 --- a/ql/src/test/results/clientpositive/constant_prop_3.q.out +++ b/ql/src/test/results/clientpositive/constant_prop_3.q.out @@ -51,10 +51,12 @@ POSTHOOK: Output: default@part_hive PREHOOK: query: analyze table part_hive compute statistics for columns PREHOOK: type: QUERY PREHOOK: Input: default@part_hive +PREHOOK: Output: default@part_hive #### A masked pattern was here #### POSTHOOK: query: analyze table part_hive compute statistics for columns POSTHOOK: type: QUERY POSTHOOK: Input: default@part_hive +POSTHOOK: Output: default@part_hive #### A masked pattern was here #### PREHOOK: query: analyze table partsupp_hive compute statistics PREHOOK: type: QUERY @@ -67,10 +69,12 @@ POSTHOOK: Output: default@partsupp_hive PREHOOK: query: analyze table partsupp_hive compute statistics for columns PREHOOK: type: QUERY PREHOOK: Input: default@partsupp_hive +PREHOOK: Output: default@partsupp_hive #### A masked pattern was here #### POSTHOOK: query: analyze table partsupp_hive compute statistics for columns POSTHOOK: type: QUERY POSTHOOK: Input: default@partsupp_hive +POSTHOOK: Output: default@partsupp_hive #### A masked pattern was here #### PREHOOK: query: analyze table supplier_hive compute statistics PREHOOK: type: QUERY @@ -83,10 +87,12 @@ POSTHOOK: Output: default@supplier_hive PREHOOK: query: analyze table supplier_hive compute statistics for columns PREHOOK: type: QUERY PREHOOK: Input: default@supplier_hive +PREHOOK: Output: default@supplier_hive #### A masked pattern was here #### POSTHOOK: query: analyze table supplier_hive compute statistics for columns POSTHOOK: type: QUERY POSTHOOK: Input: default@supplier_hive +POSTHOOK: Output: default@supplier_hive #### A masked pattern was here #### Warning: Shuffle Join JOIN[25][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Stage-2:MAPRED' is a cross product PREHOOK: query: explain select diff --git a/ql/src/test/results/clientpositive/decimal_stats.q.out b/ql/src/test/results/clientpositive/decimal_stats.q.out index cb02f76..38240ac 100644 --- a/ql/src/test/results/clientpositive/decimal_stats.q.out +++ b/ql/src/test/results/clientpositive/decimal_stats.q.out @@ -35,10 +35,12 @@ POSTHOOK: Lineage: decimal_1.v EXPRESSION [] PREHOOK: query: analyze table decimal_1 compute statistics for columns PREHOOK: type: QUERY PREHOOK: Input: default@decimal_1 +PREHOOK: Output: default@decimal_1 #### A masked pattern was here #### POSTHOOK: query: analyze table decimal_1 compute statistics for columns POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_1 +POSTHOOK: Output: default@decimal_1 #### A masked pattern was here #### PREHOOK: query: desc formatted decimal_1 v PREHOOK: type: DESCTABLE diff --git a/ql/src/test/results/clientpositive/deleteAnalyze.q.out b/ql/src/test/results/clientpositive/deleteAnalyze.q.out index 4382522..ddb6596 100644 --- a/ql/src/test/results/clientpositive/deleteAnalyze.q.out +++ b/ql/src/test/results/clientpositive/deleteAnalyze.q.out @@ -75,18 +75,22 @@ amount decimal(10,3) PREHOOK: query: analyze table testdeci2 compute statistics for columns PREHOOK: type: QUERY PREHOOK: Input: default@testdeci2 +PREHOOK: Output: default@testdeci2 #### A masked pattern was here #### POSTHOOK: query: analyze table testdeci2 compute statistics for columns POSTHOOK: type: QUERY POSTHOOK: Input: default@testdeci2 +POSTHOOK: Output: default@testdeci2 #### A masked pattern was here #### PREHOOK: query: analyze table testdeci2 compute statistics for columns PREHOOK: type: QUERY PREHOOK: Input: default@testdeci2 +PREHOOK: Output: default@testdeci2 #### A masked pattern was here #### POSTHOOK: query: analyze table testdeci2 compute statistics for columns POSTHOOK: type: QUERY POSTHOOK: Input: default@testdeci2 +POSTHOOK: Output: default@testdeci2 #### A masked pattern was here #### PREHOOK: query: explain select s.id, diff --git a/ql/src/test/results/clientpositive/display_colstats_tbllvl.q.out b/ql/src/test/results/clientpositive/display_colstats_tbllvl.q.out index e10edb4..1756317 100644 --- a/ql/src/test/results/clientpositive/display_colstats_tbllvl.q.out +++ b/ql/src/test/results/clientpositive/display_colstats_tbllvl.q.out @@ -62,7 +62,8 @@ analyze table UserVisits_web_text_none compute statistics for columns sourceIP, POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-0 is a root stage - Stage-1 depends on stages: Stage-0 + Stage-1 depends on stages: Stage-0, Stage-2 + Stage-2 depends on stages: Stage-0 STAGE PLANS: Stage: Stage-0 @@ -105,6 +106,9 @@ STAGE PLANS: Column Types: string, int, float Table: default.uservisits_web_text_none + Stage: Stage-2 + Stats-Aggr Operator + PREHOOK: query: explain extended analyze table UserVisits_web_text_none compute statistics for columns sourceIP, avgTimeOnSite, adRevenue PREHOOK: type: QUERY @@ -113,7 +117,8 @@ analyze table UserVisits_web_text_none compute statistics for columns sourceIP, POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-0 is a root stage - Stage-1 depends on stages: Stage-0 + Stage-1 depends on stages: Stage-0, Stage-2 + Stage-2 depends on stages: Stage-0 STAGE PLANS: Stage: Stage-0 @@ -122,7 +127,8 @@ STAGE PLANS: TableScan alias: uservisits_web_text_none Statistics: Num rows: 65 Data size: 7060 Basic stats: COMPLETE Column stats: NONE - GatherStats: false + Statistics Aggregation Key Prefix: default.uservisits_web_text_none/ + GatherStats: true Select Operator expressions: sourceip (type: string), avgtimeonsite (type: int), adrevenue (type: float) outputColumnNames: sourceip, avgtimeonsite, adrevenue @@ -228,13 +234,19 @@ STAGE PLANS: Table: default.uservisits_web_text_none Is Table Level Stats: true + Stage: Stage-2 + Stats-Aggr Operator + Stats Aggregation Key Prefix: default.uservisits_web_text_none/ + PREHOOK: query: analyze table UserVisits_web_text_none compute statistics for columns sourceIP, avgTimeOnSite, adRevenue PREHOOK: type: QUERY PREHOOK: Input: default@uservisits_web_text_none +PREHOOK: Output: default@uservisits_web_text_none #### A masked pattern was here #### POSTHOOK: query: analyze table UserVisits_web_text_none compute statistics for columns sourceIP, avgTimeOnSite, adRevenue POSTHOOK: type: QUERY POSTHOOK: Input: default@uservisits_web_text_none +POSTHOOK: Output: default@uservisits_web_text_none #### A masked pattern was here #### PREHOOK: query: desc formatted UserVisits_web_text_none sourceIP PREHOOK: type: DESCTABLE @@ -300,7 +312,8 @@ analyze table empty_tab compute statistics for columns a,b,c,d,e POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-0 is a root stage - Stage-1 depends on stages: Stage-0 + Stage-1 depends on stages: Stage-0, Stage-2 + Stage-2 depends on stages: Stage-0 STAGE PLANS: Stage: Stage-0 @@ -343,13 +356,18 @@ STAGE PLANS: Column Types: int, double, string, boolean, binary Table: default.empty_tab + Stage: Stage-2 + Stats-Aggr Operator + PREHOOK: query: analyze table empty_tab compute statistics for columns a,b,c,d,e PREHOOK: type: QUERY PREHOOK: Input: default@empty_tab +PREHOOK: Output: default@empty_tab #### A masked pattern was here #### POSTHOOK: query: analyze table empty_tab compute statistics for columns a,b,c,d,e POSTHOOK: type: QUERY POSTHOOK: Input: default@empty_tab +POSTHOOK: Output: default@empty_tab #### A masked pattern was here #### PREHOOK: query: desc formatted empty_tab a PREHOOK: type: DESCTABLE @@ -469,10 +487,12 @@ PREHOOK: query: analyze table UserVisits_web_text_none compute statistics for co PREHOOK: type: QUERY PREHOOK: Input: test@uservisits_web_text_none #### A masked pattern was here #### +PREHOOK: Output: test@uservisits_web_text_none POSTHOOK: query: analyze table UserVisits_web_text_none compute statistics for columns sKeyword POSTHOOK: type: QUERY POSTHOOK: Input: test@uservisits_web_text_none #### A masked pattern was here #### +POSTHOOK: Output: test@uservisits_web_text_none PREHOOK: query: desc extended UserVisits_web_text_none sKeyword PREHOOK: type: DESCTABLE PREHOOK: Input: test@uservisits_web_text_none diff --git a/ql/src/test/results/clientpositive/distinct_stats.q.out b/ql/src/test/results/clientpositive/distinct_stats.q.out index 73b4add..bc2ab02 100644 --- a/ql/src/test/results/clientpositive/distinct_stats.q.out +++ b/ql/src/test/results/clientpositive/distinct_stats.q.out @@ -19,10 +19,12 @@ POSTHOOK: Lineage: t1.b SIMPLE [(src)src.FieldSchema(name:value, type:string, co PREHOOK: query: analyze table t1 compute statistics for columns a,b PREHOOK: type: QUERY PREHOOK: Input: default@t1 +PREHOOK: Output: default@t1 #### A masked pattern was here #### POSTHOOK: query: analyze table t1 compute statistics for columns a,b POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 +POSTHOOK: Output: default@t1 #### A masked pattern was here #### PREHOOK: query: explain select count(distinct b) from t1 group by a diff --git a/ql/src/test/results/clientpositive/drop_table_with_stats.q.out b/ql/src/test/results/clientpositive/drop_table_with_stats.q.out index 52aa10a..119f74a 100644 --- a/ql/src/test/results/clientpositive/drop_table_with_stats.q.out +++ b/ql/src/test/results/clientpositive/drop_table_with_stats.q.out @@ -30,10 +30,12 @@ PREHOOK: query: ANALYZE TABLE testtable COMPUTE STATISTICS FOR COLUMNS key PREHOOK: type: QUERY PREHOOK: Input: tblstatsdb1@testtable #### A masked pattern was here #### +PREHOOK: Output: tblstatsdb1@testtable POSTHOOK: query: ANALYZE TABLE testtable COMPUTE STATISTICS FOR COLUMNS key POSTHOOK: type: QUERY POSTHOOK: Input: tblstatsdb1@testtable #### A masked pattern was here #### +POSTHOOK: Output: tblstatsdb1@testtable PREHOOK: query: CREATE TABLE IF NOT EXISTS TestTable1 (key STRING, value STRING) PREHOOK: type: CREATETABLE PREHOOK: Output: database:tblstatsdb1 @@ -54,10 +56,12 @@ PREHOOK: query: ANALYZE TABLE TestTable1 COMPUTE STATISTICS FOR COLUMNS key PREHOOK: type: QUERY PREHOOK: Input: tblstatsdb1@testtable1 #### A masked pattern was here #### +PREHOOK: Output: tblstatsdb1@testtable1 POSTHOOK: query: ANALYZE TABLE TestTable1 COMPUTE STATISTICS FOR COLUMNS key POSTHOOK: type: QUERY POSTHOOK: Input: tblstatsdb1@testtable1 #### A masked pattern was here #### +POSTHOOK: Output: tblstatsdb1@testtable1 PREHOOK: query: CREATE TABLE IF NOT EXISTS TESTTABLE2 (key STRING, value STRING) PREHOOK: type: CREATETABLE PREHOOK: Output: database:tblstatsdb1 @@ -78,10 +82,12 @@ PREHOOK: query: ANALYZE TABLE TESTTABLE2 COMPUTE STATISTICS FOR COLUMNS key PREHOOK: type: QUERY PREHOOK: Input: tblstatsdb1@testtable2 #### A masked pattern was here #### +PREHOOK: Output: tblstatsdb1@testtable2 POSTHOOK: query: ANALYZE TABLE TESTTABLE2 COMPUTE STATISTICS FOR COLUMNS key POSTHOOK: type: QUERY POSTHOOK: Input: tblstatsdb1@testtable2 #### A masked pattern was here #### +POSTHOOK: Output: tblstatsdb1@testtable2 PREHOOK: query: DROP TABLE tblstatsdb1.testtable PREHOOK: type: DROPTABLE PREHOOK: Input: tblstatsdb1@testtable @@ -146,10 +152,12 @@ PREHOOK: query: ANALYZE TABLE testtable COMPUTE STATISTICS FOR COLUMNS key PREHOOK: type: QUERY PREHOOK: Input: tblstatsdb2@testtable #### A masked pattern was here #### +PREHOOK: Output: tblstatsdb2@testtable POSTHOOK: query: ANALYZE TABLE testtable COMPUTE STATISTICS FOR COLUMNS key POSTHOOK: type: QUERY POSTHOOK: Input: tblstatsdb2@testtable #### A masked pattern was here #### +POSTHOOK: Output: tblstatsdb2@testtable PREHOOK: query: CREATE TABLE IF NOT EXISTS TestTable1 (key STRING, value STRING) PREHOOK: type: CREATETABLE PREHOOK: Output: TBLSTATSDB2@TestTable1 @@ -170,10 +178,12 @@ PREHOOK: query: ANALYZE TABLE TestTable1 COMPUTE STATISTICS FOR COLUMNS key PREHOOK: type: QUERY PREHOOK: Input: tblstatsdb2@testtable1 #### A masked pattern was here #### +PREHOOK: Output: tblstatsdb2@testtable1 POSTHOOK: query: ANALYZE TABLE TestTable1 COMPUTE STATISTICS FOR COLUMNS key POSTHOOK: type: QUERY POSTHOOK: Input: tblstatsdb2@testtable1 #### A masked pattern was here #### +POSTHOOK: Output: tblstatsdb2@testtable1 PREHOOK: query: CREATE TABLE IF NOT EXISTS TESTTABLE2 (key STRING, value STRING) PREHOOK: type: CREATETABLE PREHOOK: Output: TBLSTATSDB2@TESTTABLE2 @@ -194,10 +204,12 @@ PREHOOK: query: ANALYZE TABLE TESTTABLE2 COMPUTE STATISTICS FOR COLUMNS key PREHOOK: type: QUERY PREHOOK: Input: tblstatsdb2@testtable2 #### A masked pattern was here #### +PREHOOK: Output: tblstatsdb2@testtable2 POSTHOOK: query: ANALYZE TABLE TESTTABLE2 COMPUTE STATISTICS FOR COLUMNS key POSTHOOK: type: QUERY POSTHOOK: Input: tblstatsdb2@testtable2 #### A masked pattern was here #### +POSTHOOK: Output: tblstatsdb2@testtable2 PREHOOK: query: DROP TABLE TBLSTATSDB2.testtable PREHOOK: type: DROPTABLE PREHOOK: Input: tblstatsdb2@testtable diff --git a/ql/src/test/results/clientpositive/exec_parallel_column_stats.q.out b/ql/src/test/results/clientpositive/exec_parallel_column_stats.q.out index f256ec1..23517b0 100644 --- a/ql/src/test/results/clientpositive/exec_parallel_column_stats.q.out +++ b/ql/src/test/results/clientpositive/exec_parallel_column_stats.q.out @@ -1,22 +1,31 @@ -PREHOOK: query: explain analyze table src compute statistics for columns +PREHOOK: query: create table s like src +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@s +POSTHOOK: query: create table s like src +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@s +PREHOOK: query: explain analyze table s compute statistics for columns PREHOOK: type: QUERY -POSTHOOK: query: explain analyze table src compute statistics for columns +POSTHOOK: query: explain analyze table s compute statistics for columns POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-0 is a root stage - Stage-1 depends on stages: Stage-0 + Stage-1 depends on stages: Stage-0, Stage-2 + Stage-2 depends on stages: Stage-0 STAGE PLANS: Stage: Stage-0 Map Reduce Map Operator Tree: TableScan - alias: src - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + alias: s + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: key, value - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Group By Operator aggregations: compute_stats(key, 16), compute_stats(value, 16) mode: hash @@ -45,13 +54,18 @@ STAGE PLANS: Column Stats Desc: Columns: key, value Column Types: string, string - Table: default.src + Table: default.s -PREHOOK: query: analyze table src compute statistics for columns + Stage: Stage-2 + Stats-Aggr Operator + +PREHOOK: query: analyze table s compute statistics for columns PREHOOK: type: QUERY -PREHOOK: Input: default@src +PREHOOK: Input: default@s +PREHOOK: Output: default@s #### A masked pattern was here #### -POSTHOOK: query: analyze table src compute statistics for columns +POSTHOOK: query: analyze table s compute statistics for columns POSTHOOK: type: QUERY -POSTHOOK: Input: default@src +POSTHOOK: Input: default@s +POSTHOOK: Output: default@s #### A masked pattern was here #### diff --git a/ql/src/test/results/clientpositive/extrapolate_part_stats_date.q.out b/ql/src/test/results/clientpositive/extrapolate_part_stats_date.q.out index b12d3a1..5f42737 100644 --- a/ql/src/test/results/clientpositive/extrapolate_part_stats_date.q.out +++ b/ql/src/test/results/clientpositive/extrapolate_part_stats_date.q.out @@ -33,6 +33,10 @@ PREHOOK: Input: default@date_dim PREHOOK: Input: default@date_dim@d_date_sk=2416945 PREHOOK: Input: default@date_dim@d_date_sk=2416946 PREHOOK: Input: default@date_dim@d_date_sk=2416947 +PREHOOK: Output: default@date_dim +PREHOOK: Output: default@date_dim@d_date_sk=2416945 +PREHOOK: Output: default@date_dim@d_date_sk=2416946 +PREHOOK: Output: default@date_dim@d_date_sk=2416947 #### A masked pattern was here #### POSTHOOK: query: analyze table date_dim partition(d_date_sk) compute statistics for columns POSTHOOK: type: QUERY @@ -40,6 +44,10 @@ POSTHOOK: Input: default@date_dim POSTHOOK: Input: default@date_dim@d_date_sk=2416945 POSTHOOK: Input: default@date_dim@d_date_sk=2416946 POSTHOOK: Input: default@date_dim@d_date_sk=2416947 +POSTHOOK: Output: default@date_dim +POSTHOOK: Output: default@date_dim@d_date_sk=2416945 +POSTHOOK: Output: default@date_dim@d_date_sk=2416946 +POSTHOOK: Output: default@date_dim@d_date_sk=2416947 #### A masked pattern was here #### PREHOOK: query: explain select count(*) from date_dim where d_date > date "1900-01-02" and d_date_sk= 2416945 PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/llap/alter_table_invalidate_column_stats.q.out b/ql/src/test/results/clientpositive/llap/alter_table_invalidate_column_stats.q.out index 85d7dc4..38854ae 100644 --- a/ql/src/test/results/clientpositive/llap/alter_table_invalidate_column_stats.q.out +++ b/ql/src/test/results/clientpositive/llap/alter_table_invalidate_column_stats.q.out @@ -77,10 +77,12 @@ PREHOOK: query: analyze table testtable1 compute statistics for columns PREHOOK: type: QUERY PREHOOK: Input: statsdb1@testtable1 #### A masked pattern was here #### +PREHOOK: Output: statsdb1@testtable1 POSTHOOK: query: analyze table testtable1 compute statistics for columns POSTHOOK: type: QUERY POSTHOOK: Input: statsdb1@testtable1 #### A masked pattern was here #### +POSTHOOK: Output: statsdb1@testtable1 PREHOOK: query: describe formatted statsdb1.testtable1 col1 PREHOOK: type: DESCTABLE PREHOOK: Input: statsdb1@testtable1 @@ -193,12 +195,18 @@ PREHOOK: Input: statsdb1@testpart1 PREHOOK: Input: statsdb1@testpart1@part=part1 PREHOOK: Input: statsdb1@testpart1@part=part2 #### A masked pattern was here #### +PREHOOK: Output: statsdb1@testpart1 +PREHOOK: Output: statsdb1@testpart1@part=part1 +PREHOOK: Output: statsdb1@testpart1@part=part2 POSTHOOK: query: analyze table testpart1 compute statistics for columns POSTHOOK: type: QUERY POSTHOOK: Input: statsdb1@testpart1 POSTHOOK: Input: statsdb1@testpart1@part=part1 POSTHOOK: Input: statsdb1@testpart1@part=part2 #### A masked pattern was here #### +POSTHOOK: Output: statsdb1@testpart1 +POSTHOOK: Output: statsdb1@testpart1@part=part1 +POSTHOOK: Output: statsdb1@testpart1@part=part2 PREHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part1') col1 PREHOOK: type: DESCTABLE PREHOOK: Input: statsdb1@testpart1 @@ -543,10 +551,12 @@ PREHOOK: query: analyze table testtable1 compute statistics for columns PREHOOK: type: QUERY PREHOOK: Input: statsdb1@testtable1 #### A masked pattern was here #### +PREHOOK: Output: statsdb1@testtable1 POSTHOOK: query: analyze table testtable1 compute statistics for columns POSTHOOK: type: QUERY POSTHOOK: Input: statsdb1@testtable1 #### A masked pattern was here #### +POSTHOOK: Output: statsdb1@testtable1 PREHOOK: query: describe formatted statsdb1.testtable1 col1 PREHOOK: type: DESCTABLE PREHOOK: Input: statsdb1@testtable1 @@ -659,12 +669,18 @@ PREHOOK: Input: statsdb1@testpart1 PREHOOK: Input: statsdb1@testpart1@part=part1 PREHOOK: Input: statsdb1@testpart1@part=part2 #### A masked pattern was here #### +PREHOOK: Output: statsdb1@testpart1 +PREHOOK: Output: statsdb1@testpart1@part=part1 +PREHOOK: Output: statsdb1@testpart1@part=part2 POSTHOOK: query: analyze table testpart1 compute statistics for columns POSTHOOK: type: QUERY POSTHOOK: Input: statsdb1@testpart1 POSTHOOK: Input: statsdb1@testpart1@part=part1 POSTHOOK: Input: statsdb1@testpart1@part=part2 #### A masked pattern was here #### +POSTHOOK: Output: statsdb1@testpart1 +POSTHOOK: Output: statsdb1@testpart1@part=part1 +POSTHOOK: Output: statsdb1@testpart1@part=part2 PREHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part1') col1 PREHOOK: type: DESCTABLE PREHOOK: Input: statsdb1@testpart1 diff --git a/ql/src/test/results/clientpositive/llap/columnStatsUpdateForStatsOptimizer_1.q.out b/ql/src/test/results/clientpositive/llap/columnStatsUpdateForStatsOptimizer_1.q.out index 80ccddd..bc76dd3 100644 --- a/ql/src/test/results/clientpositive/llap/columnStatsUpdateForStatsOptimizer_1.q.out +++ b/ql/src/test/results/clientpositive/llap/columnStatsUpdateForStatsOptimizer_1.q.out @@ -189,10 +189,12 @@ POSTHOOK: Input: default@calendar PREHOOK: query: analyze table calendar compute statistics for columns PREHOOK: type: QUERY PREHOOK: Input: default@calendar +PREHOOK: Output: default@calendar #### A masked pattern was here #### POSTHOOK: query: analyze table calendar compute statistics for columns POSTHOOK: type: QUERY POSTHOOK: Input: default@calendar +POSTHOOK: Output: default@calendar #### A masked pattern was here #### PREHOOK: query: desc formatted calendar PREHOOK: type: DESCTABLE @@ -432,10 +434,12 @@ POSTHOOK: Input: default@calendar PREHOOK: query: analyze table calendar compute statistics for columns year PREHOOK: type: QUERY PREHOOK: Input: default@calendar +PREHOOK: Output: default@calendar #### A masked pattern was here #### POSTHOOK: query: analyze table calendar compute statistics for columns year POSTHOOK: type: QUERY POSTHOOK: Input: default@calendar +POSTHOOK: Output: default@calendar #### A masked pattern was here #### PREHOOK: query: desc formatted calendar PREHOOK: type: DESCTABLE @@ -565,10 +569,12 @@ POSTHOOK: Input: default@calendar PREHOOK: query: analyze table calendar compute statistics for columns month PREHOOK: type: QUERY PREHOOK: Input: default@calendar +PREHOOK: Output: default@calendar #### A masked pattern was here #### POSTHOOK: query: analyze table calendar compute statistics for columns month POSTHOOK: type: QUERY POSTHOOK: Input: default@calendar +POSTHOOK: Output: default@calendar #### A masked pattern was here #### PREHOOK: query: desc formatted calendar PREHOOK: type: DESCTABLE diff --git a/ql/src/test/results/clientpositive/llap/columnstats_part_coltype.q.out b/ql/src/test/results/clientpositive/llap/columnstats_part_coltype.q.out index 3e28e58..6c5babb 100644 --- a/ql/src/test/results/clientpositive/llap/columnstats_part_coltype.q.out +++ b/ql/src/test/results/clientpositive/llap/columnstats_part_coltype.q.out @@ -230,6 +230,12 @@ PREHOOK: Input: default@partcolstats@ds=2015-04-02/hr=2/part=partB PREHOOK: Input: default@partcolstats@ds=2015-04-02/hr=3/part=partA PREHOOK: Input: default@partcolstats@ds=2015-04-03/hr=3/part=partA PREHOOK: Input: default@partcolstats@ds=2015-04-03/hr=3/part=partB +PREHOOK: Output: default@partcolstats +PREHOOK: Output: default@partcolstats@ds=2015-04-02/hr=2/part=partA +PREHOOK: Output: default@partcolstats@ds=2015-04-02/hr=2/part=partB +PREHOOK: Output: default@partcolstats@ds=2015-04-02/hr=3/part=partA +PREHOOK: Output: default@partcolstats@ds=2015-04-03/hr=3/part=partA +PREHOOK: Output: default@partcolstats@ds=2015-04-03/hr=3/part=partB #### A masked pattern was here #### POSTHOOK: query: analyze table partcolstats partition (ds, hr, part) compute statistics for columns POSTHOOK: type: QUERY @@ -239,6 +245,12 @@ POSTHOOK: Input: default@partcolstats@ds=2015-04-02/hr=2/part=partB POSTHOOK: Input: default@partcolstats@ds=2015-04-02/hr=3/part=partA POSTHOOK: Input: default@partcolstats@ds=2015-04-03/hr=3/part=partA POSTHOOK: Input: default@partcolstats@ds=2015-04-03/hr=3/part=partB +POSTHOOK: Output: default@partcolstats +POSTHOOK: Output: default@partcolstats@ds=2015-04-02/hr=2/part=partA +POSTHOOK: Output: default@partcolstats@ds=2015-04-02/hr=2/part=partB +POSTHOOK: Output: default@partcolstats@ds=2015-04-02/hr=3/part=partA +POSTHOOK: Output: default@partcolstats@ds=2015-04-03/hr=3/part=partA +POSTHOOK: Output: default@partcolstats@ds=2015-04-03/hr=3/part=partB #### A masked pattern was here #### PREHOOK: query: describe formatted partcolstats partition (ds=date '2015-04-03', hr=3, part='partA') key PREHOOK: type: DESCTABLE diff --git a/ql/src/test/results/clientpositive/llap/deleteAnalyze.q.out b/ql/src/test/results/clientpositive/llap/deleteAnalyze.q.out index ba14835..98ba6af 100644 --- a/ql/src/test/results/clientpositive/llap/deleteAnalyze.q.out +++ b/ql/src/test/results/clientpositive/llap/deleteAnalyze.q.out @@ -75,18 +75,22 @@ amount decimal(10,3) PREHOOK: query: analyze table testdeci2 compute statistics for columns PREHOOK: type: QUERY PREHOOK: Input: default@testdeci2 +PREHOOK: Output: default@testdeci2 #### A masked pattern was here #### POSTHOOK: query: analyze table testdeci2 compute statistics for columns POSTHOOK: type: QUERY POSTHOOK: Input: default@testdeci2 +POSTHOOK: Output: default@testdeci2 #### A masked pattern was here #### PREHOOK: query: analyze table testdeci2 compute statistics for columns PREHOOK: type: QUERY PREHOOK: Input: default@testdeci2 +PREHOOK: Output: default@testdeci2 #### A masked pattern was here #### POSTHOOK: query: analyze table testdeci2 compute statistics for columns POSTHOOK: type: QUERY POSTHOOK: Input: default@testdeci2 +POSTHOOK: Output: default@testdeci2 #### A masked pattern was here #### PREHOOK: query: explain select s.id, diff --git a/ql/src/test/results/clientpositive/llap/drop_partition_with_stats.q.out b/ql/src/test/results/clientpositive/llap/drop_partition_with_stats.q.out index c6ab40d..98b2193 100644 --- a/ql/src/test/results/clientpositive/llap/drop_partition_with_stats.q.out +++ b/ql/src/test/results/clientpositive/llap/drop_partition_with_stats.q.out @@ -42,12 +42,18 @@ PREHOOK: Input: partstatsdb1@testtable PREHOOK: Input: partstatsdb1@testtable@part1=p11/part2=P12 PREHOOK: Input: partstatsdb1@testtable@part1=p21/part2=P22 #### A masked pattern was here #### +PREHOOK: Output: partstatsdb1@testtable +PREHOOK: Output: partstatsdb1@testtable@part1=p11/part2=P12 +PREHOOK: Output: partstatsdb1@testtable@part1=p21/part2=P22 POSTHOOK: query: ANALYZE TABLE testtable COMPUTE STATISTICS FOR COLUMNS key POSTHOOK: type: QUERY POSTHOOK: Input: partstatsdb1@testtable POSTHOOK: Input: partstatsdb1@testtable@part1=p11/part2=P12 POSTHOOK: Input: partstatsdb1@testtable@part1=p21/part2=P22 #### A masked pattern was here #### +POSTHOOK: Output: partstatsdb1@testtable +POSTHOOK: Output: partstatsdb1@testtable@part1=p11/part2=P12 +POSTHOOK: Output: partstatsdb1@testtable@part1=p21/part2=P22 PREHOOK: query: ANALYZE TABLE testtable PARTITION (part1='p11', Part2='P12') COMPUTE STATISTICS FOR COLUMNS key PREHOOK: type: QUERY PREHOOK: Input: partstatsdb1@testtable @@ -110,6 +116,11 @@ PREHOOK: Input: partstatsdb1@testtable1@part1=p11/part2=P12 PREHOOK: Input: partstatsdb1@testtable1@part1=p21/part2=P22 PREHOOK: Input: partstatsdb1@testtable1@part1=p31/part2=P32 #### A masked pattern was here #### +PREHOOK: Output: partstatsdb1@testtable1 +PREHOOK: Output: partstatsdb1@testtable1@part1=p11/part2=P11 +PREHOOK: Output: partstatsdb1@testtable1@part1=p11/part2=P12 +PREHOOK: Output: partstatsdb1@testtable1@part1=p21/part2=P22 +PREHOOK: Output: partstatsdb1@testtable1@part1=p31/part2=P32 POSTHOOK: query: ANALYZE TABLE TestTable1 COMPUTE STATISTICS FOR COLUMNS key POSTHOOK: type: QUERY POSTHOOK: Input: partstatsdb1@testtable1 @@ -118,6 +129,11 @@ POSTHOOK: Input: partstatsdb1@testtable1@part1=p11/part2=P12 POSTHOOK: Input: partstatsdb1@testtable1@part1=p21/part2=P22 POSTHOOK: Input: partstatsdb1@testtable1@part1=p31/part2=P32 #### A masked pattern was here #### +POSTHOOK: Output: partstatsdb1@testtable1 +POSTHOOK: Output: partstatsdb1@testtable1@part1=p11/part2=P11 +POSTHOOK: Output: partstatsdb1@testtable1@part1=p11/part2=P12 +POSTHOOK: Output: partstatsdb1@testtable1@part1=p21/part2=P22 +POSTHOOK: Output: partstatsdb1@testtable1@part1=p31/part2=P32 PREHOOK: query: ANALYZE TABLE TestTable1 PARTITION (part1='p11') COMPUTE STATISTICS FOR COLUMNS key PREHOOK: type: QUERY PREHOOK: Input: partstatsdb1@testtable1 @@ -172,12 +188,18 @@ PREHOOK: Input: partstatsdb1@testtable2 PREHOOK: Input: partstatsdb1@testtable2@part1=p11/part2=P12 PREHOOK: Input: partstatsdb1@testtable2@part1=p21/part2=P22 #### A masked pattern was here #### +PREHOOK: Output: partstatsdb1@testtable2 +PREHOOK: Output: partstatsdb1@testtable2@part1=p11/part2=P12 +PREHOOK: Output: partstatsdb1@testtable2@part1=p21/part2=P22 POSTHOOK: query: ANALYZE TABLE TESTTABLE2 COMPUTE STATISTICS FOR COLUMNS key POSTHOOK: type: QUERY POSTHOOK: Input: partstatsdb1@testtable2 POSTHOOK: Input: partstatsdb1@testtable2@part1=p11/part2=P12 POSTHOOK: Input: partstatsdb1@testtable2@part1=p21/part2=P22 #### A masked pattern was here #### +POSTHOOK: Output: partstatsdb1@testtable2 +POSTHOOK: Output: partstatsdb1@testtable2@part1=p11/part2=P12 +POSTHOOK: Output: partstatsdb1@testtable2@part1=p21/part2=P22 PREHOOK: query: ANALYZE TABLE TESTTABLE2 PARTITION (part1='p11', Part2='P12') COMPUTE STATISTICS FOR COLUMNS key PREHOOK: type: QUERY PREHOOK: Input: partstatsdb1@testtable2 @@ -288,12 +310,18 @@ PREHOOK: Input: partstatsdb2@testtable PREHOOK: Input: partstatsdb2@testtable@part1=p11/part2=P12 PREHOOK: Input: partstatsdb2@testtable@part1=p21/part2=P22 #### A masked pattern was here #### +PREHOOK: Output: partstatsdb2@testtable +PREHOOK: Output: partstatsdb2@testtable@part1=p11/part2=P12 +PREHOOK: Output: partstatsdb2@testtable@part1=p21/part2=P22 POSTHOOK: query: ANALYZE TABLE testtable COMPUTE STATISTICS FOR COLUMNS key POSTHOOK: type: QUERY POSTHOOK: Input: partstatsdb2@testtable POSTHOOK: Input: partstatsdb2@testtable@part1=p11/part2=P12 POSTHOOK: Input: partstatsdb2@testtable@part1=p21/part2=P22 #### A masked pattern was here #### +POSTHOOK: Output: partstatsdb2@testtable +POSTHOOK: Output: partstatsdb2@testtable@part1=p11/part2=P12 +POSTHOOK: Output: partstatsdb2@testtable@part1=p21/part2=P22 PREHOOK: query: ANALYZE TABLE testtable PARTITION (part1='p11', Part2='P12') COMPUTE STATISTICS FOR COLUMNS key PREHOOK: type: QUERY PREHOOK: Input: partstatsdb2@testtable @@ -356,6 +384,11 @@ PREHOOK: Input: partstatsdb2@testtable1@part1=p11/part2=P12 PREHOOK: Input: partstatsdb2@testtable1@part1=p21/part2=P22 PREHOOK: Input: partstatsdb2@testtable1@part1=p31/part2=P32 #### A masked pattern was here #### +PREHOOK: Output: partstatsdb2@testtable1 +PREHOOK: Output: partstatsdb2@testtable1@part1=p11/part2=P11 +PREHOOK: Output: partstatsdb2@testtable1@part1=p11/part2=P12 +PREHOOK: Output: partstatsdb2@testtable1@part1=p21/part2=P22 +PREHOOK: Output: partstatsdb2@testtable1@part1=p31/part2=P32 POSTHOOK: query: ANALYZE TABLE TestTable1 COMPUTE STATISTICS FOR COLUMNS key POSTHOOK: type: QUERY POSTHOOK: Input: partstatsdb2@testtable1 @@ -364,6 +397,11 @@ POSTHOOK: Input: partstatsdb2@testtable1@part1=p11/part2=P12 POSTHOOK: Input: partstatsdb2@testtable1@part1=p21/part2=P22 POSTHOOK: Input: partstatsdb2@testtable1@part1=p31/part2=P32 #### A masked pattern was here #### +POSTHOOK: Output: partstatsdb2@testtable1 +POSTHOOK: Output: partstatsdb2@testtable1@part1=p11/part2=P11 +POSTHOOK: Output: partstatsdb2@testtable1@part1=p11/part2=P12 +POSTHOOK: Output: partstatsdb2@testtable1@part1=p21/part2=P22 +POSTHOOK: Output: partstatsdb2@testtable1@part1=p31/part2=P32 PREHOOK: query: ANALYZE TABLE TestTable1 PARTITION (part1='p11') COMPUTE STATISTICS FOR COLUMNS key PREHOOK: type: QUERY PREHOOK: Input: partstatsdb2@testtable1 @@ -418,12 +456,18 @@ PREHOOK: Input: partstatsdb2@testtable2 PREHOOK: Input: partstatsdb2@testtable2@part1=p11/part2=P12 PREHOOK: Input: partstatsdb2@testtable2@part1=p21/part2=P22 #### A masked pattern was here #### +PREHOOK: Output: partstatsdb2@testtable2 +PREHOOK: Output: partstatsdb2@testtable2@part1=p11/part2=P12 +PREHOOK: Output: partstatsdb2@testtable2@part1=p21/part2=P22 POSTHOOK: query: ANALYZE TABLE TESTTABLE2 COMPUTE STATISTICS FOR COLUMNS key POSTHOOK: type: QUERY POSTHOOK: Input: partstatsdb2@testtable2 POSTHOOK: Input: partstatsdb2@testtable2@part1=p11/part2=P12 POSTHOOK: Input: partstatsdb2@testtable2@part1=p21/part2=P22 #### A masked pattern was here #### +POSTHOOK: Output: partstatsdb2@testtable2 +POSTHOOK: Output: partstatsdb2@testtable2@part1=p11/part2=P12 +POSTHOOK: Output: partstatsdb2@testtable2@part1=p21/part2=P22 PREHOOK: query: ANALYZE TABLE TESTTABLE2 PARTITION (part1='p11', Part2='P12') COMPUTE STATISTICS FOR COLUMNS key PREHOOK: type: QUERY PREHOOK: Input: partstatsdb2@testtable2 diff --git a/ql/src/test/results/clientpositive/llap/explainuser_2.q.out b/ql/src/test/results/clientpositive/llap/explainuser_2.q.out index 67f960a..474d9c1 100644 --- a/ql/src/test/results/clientpositive/llap/explainuser_2.q.out +++ b/ql/src/test/results/clientpositive/llap/explainuser_2.q.out @@ -127,10 +127,12 @@ POSTHOOK: Output: default@ss PREHOOK: query: ANALYZE TABLE ss COMPUTE STATISTICS FOR COLUMNS k1,v1,k2,v2,k3,v3 PREHOOK: type: QUERY PREHOOK: Input: default@ss +PREHOOK: Output: default@ss #### A masked pattern was here #### POSTHOOK: query: ANALYZE TABLE ss COMPUTE STATISTICS FOR COLUMNS k1,v1,k2,v2,k3,v3 POSTHOOK: type: QUERY POSTHOOK: Input: default@ss +POSTHOOK: Output: default@ss #### A masked pattern was here #### PREHOOK: query: ANALYZE TABLE sr COMPUTE STATISTICS PREHOOK: type: QUERY @@ -143,10 +145,12 @@ POSTHOOK: Output: default@sr PREHOOK: query: ANALYZE TABLE sr COMPUTE STATISTICS FOR COLUMNS k1,v1,k2,v2,k3,v3 PREHOOK: type: QUERY PREHOOK: Input: default@sr +PREHOOK: Output: default@sr #### A masked pattern was here #### POSTHOOK: query: ANALYZE TABLE sr COMPUTE STATISTICS FOR COLUMNS k1,v1,k2,v2,k3,v3 POSTHOOK: type: QUERY POSTHOOK: Input: default@sr +POSTHOOK: Output: default@sr #### A masked pattern was here #### PREHOOK: query: ANALYZE TABLE cs COMPUTE STATISTICS PREHOOK: type: QUERY @@ -159,10 +163,12 @@ POSTHOOK: Output: default@cs PREHOOK: query: ANALYZE TABLE cs COMPUTE STATISTICS FOR COLUMNS k1,v1,k2,v2,k3,v3 PREHOOK: type: QUERY PREHOOK: Input: default@cs +PREHOOK: Output: default@cs #### A masked pattern was here #### POSTHOOK: query: ANALYZE TABLE cs COMPUTE STATISTICS FOR COLUMNS k1,v1,k2,v2,k3,v3 POSTHOOK: type: QUERY POSTHOOK: Input: default@cs +POSTHOOK: Output: default@cs #### A masked pattern was here #### PREHOOK: query: EXPLAIN SELECT x.key, z.value, y.value diff --git a/ql/src/test/results/clientpositive/llap/llap_stats.q.out b/ql/src/test/results/clientpositive/llap/llap_stats.q.out index f6921f1..c8b9983 100644 --- a/ql/src/test/results/clientpositive/llap/llap_stats.q.out +++ b/ql/src/test/results/clientpositive/llap/llap_stats.q.out @@ -93,7 +93,8 @@ POSTHOOK: query: explain analyze table llap_stats partition (cint) compute stati POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-0 is a root stage - Stage-2 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-0 STAGE PLANS: Stage: Stage-0 @@ -154,6 +155,9 @@ STAGE PLANS: Column Types: tinyint, smallint Table: default.llap_stats + Stage: Stage-3 + Stats-Aggr Operator + PREHOOK: query: analyze table llap_stats partition (cint) compute statistics for columns PREHOOK: type: QUERY PREHOOK: Input: default@llap_stats @@ -167,6 +171,17 @@ PREHOOK: Input: default@llap_stats@cint=-9566 PREHOOK: Input: default@llap_stats@cint=15007 PREHOOK: Input: default@llap_stats@cint=4963 PREHOOK: Input: default@llap_stats@cint=7021 +PREHOOK: Output: default@llap_stats +PREHOOK: Output: default@llap_stats@cint=-13326 +PREHOOK: Output: default@llap_stats@cint=-15431 +PREHOOK: Output: default@llap_stats@cint=-15549 +PREHOOK: Output: default@llap_stats@cint=-15813 +PREHOOK: Output: default@llap_stats@cint=-4213 +PREHOOK: Output: default@llap_stats@cint=-7824 +PREHOOK: Output: default@llap_stats@cint=-9566 +PREHOOK: Output: default@llap_stats@cint=15007 +PREHOOK: Output: default@llap_stats@cint=4963 +PREHOOK: Output: default@llap_stats@cint=7021 #### A masked pattern was here #### POSTHOOK: query: analyze table llap_stats partition (cint) compute statistics for columns POSTHOOK: type: QUERY @@ -181,6 +196,17 @@ POSTHOOK: Input: default@llap_stats@cint=-9566 POSTHOOK: Input: default@llap_stats@cint=15007 POSTHOOK: Input: default@llap_stats@cint=4963 POSTHOOK: Input: default@llap_stats@cint=7021 +POSTHOOK: Output: default@llap_stats +POSTHOOK: Output: default@llap_stats@cint=-13326 +POSTHOOK: Output: default@llap_stats@cint=-15431 +POSTHOOK: Output: default@llap_stats@cint=-15549 +POSTHOOK: Output: default@llap_stats@cint=-15813 +POSTHOOK: Output: default@llap_stats@cint=-4213 +POSTHOOK: Output: default@llap_stats@cint=-7824 +POSTHOOK: Output: default@llap_stats@cint=-9566 +POSTHOOK: Output: default@llap_stats@cint=15007 +POSTHOOK: Output: default@llap_stats@cint=4963 +POSTHOOK: Output: default@llap_stats@cint=7021 #### A masked pattern was here #### PREHOOK: query: DROP TABLE llap_stats PREHOOK: type: DROPTABLE diff --git a/ql/src/test/results/clientpositive/llap/llapdecider.q.out b/ql/src/test/results/clientpositive/llap/llapdecider.q.out index d514f42..69312cd 100644 --- a/ql/src/test/results/clientpositive/llap/llapdecider.q.out +++ b/ql/src/test/results/clientpositive/llap/llapdecider.q.out @@ -230,10 +230,12 @@ STAGE PLANS: PREHOOK: query: analyze table src_orc compute statistics for columns PREHOOK: type: QUERY PREHOOK: Input: default@src_orc +PREHOOK: Output: default@src_orc #### A masked pattern was here #### POSTHOOK: query: analyze table src_orc compute statistics for columns POSTHOOK: type: QUERY POSTHOOK: Input: default@src_orc +POSTHOOK: Output: default@src_orc #### A masked pattern was here #### PREHOOK: query: EXPLAIN SELECT key, count(value) as cnt FROM src_orc GROUP BY key ORDER BY cnt PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/llap/metadata_only_queries.q.out b/ql/src/test/results/clientpositive/llap/metadata_only_queries.q.out index c8190bd..6385619 100644 --- a/ql/src/test/results/clientpositive/llap/metadata_only_queries.q.out +++ b/ql/src/test/results/clientpositive/llap/metadata_only_queries.q.out @@ -427,10 +427,12 @@ STAGE PLANS: PREHOOK: query: analyze table stats_tbl compute statistics for columns t,si,i,b,f,d,bo,s,bin PREHOOK: type: QUERY PREHOOK: Input: default@stats_tbl +PREHOOK: Output: default@stats_tbl #### A masked pattern was here #### POSTHOOK: query: analyze table stats_tbl compute statistics for columns t,si,i,b,f,d,bo,s,bin POSTHOOK: type: QUERY POSTHOOK: Input: default@stats_tbl +POSTHOOK: Output: default@stats_tbl #### A masked pattern was here #### PREHOOK: query: analyze table stats_tbl_part partition(dt='2010') compute statistics for columns t,si,i,b,f,d,bo,s,bin PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/llap/schema_evol_stats.q.out b/ql/src/test/results/clientpositive/llap/schema_evol_stats.q.out index be8ca4a..3a83fb9 100644 --- a/ql/src/test/results/clientpositive/llap/schema_evol_stats.q.out +++ b/ql/src/test/results/clientpositive/llap/schema_evol_stats.q.out @@ -37,12 +37,18 @@ PREHOOK: type: QUERY PREHOOK: Input: default@partitioned1 PREHOOK: Input: default@partitioned1@part=1 PREHOOK: Input: default@partitioned1@part=2 +PREHOOK: Output: default@partitioned1 +PREHOOK: Output: default@partitioned1@part=1 +PREHOOK: Output: default@partitioned1@part=2 #### A masked pattern was here #### POSTHOOK: query: analyze table partitioned1 compute statistics for columns POSTHOOK: type: QUERY POSTHOOK: Input: default@partitioned1 POSTHOOK: Input: default@partitioned1@part=1 POSTHOOK: Input: default@partitioned1@part=2 +POSTHOOK: Output: default@partitioned1 +POSTHOOK: Output: default@partitioned1@part=1 +POSTHOOK: Output: default@partitioned1@part=2 #### A masked pattern was here #### PREHOOK: query: desc formatted partitioned1 PREHOOK: type: DESCTABLE @@ -231,12 +237,18 @@ PREHOOK: type: QUERY PREHOOK: Input: default@partitioned1 PREHOOK: Input: default@partitioned1@part=1 PREHOOK: Input: default@partitioned1@part=2 +PREHOOK: Output: default@partitioned1 +PREHOOK: Output: default@partitioned1@part=1 +PREHOOK: Output: default@partitioned1@part=2 #### A masked pattern was here #### POSTHOOK: query: analyze table partitioned1 compute statistics for columns POSTHOOK: type: QUERY POSTHOOK: Input: default@partitioned1 POSTHOOK: Input: default@partitioned1@part=1 POSTHOOK: Input: default@partitioned1@part=2 +POSTHOOK: Output: default@partitioned1 +POSTHOOK: Output: default@partitioned1@part=1 +POSTHOOK: Output: default@partitioned1@part=2 #### A masked pattern was here #### PREHOOK: query: desc formatted partitioned1 PREHOOK: type: DESCTABLE diff --git a/ql/src/test/results/clientpositive/llap/special_character_in_tabnames_1.q.out b/ql/src/test/results/clientpositive/llap/special_character_in_tabnames_1.q.out index d01b373..f6f3aac 100644 --- a/ql/src/test/results/clientpositive/llap/special_character_in_tabnames_1.q.out +++ b/ql/src/test/results/clientpositive/llap/special_character_in_tabnames_1.q.out @@ -162,11 +162,15 @@ PREHOOK: query: analyze table `c/b/o_t1` compute statistics for columns key, val PREHOOK: type: QUERY PREHOOK: Input: default@c/b/o_t1 PREHOOK: Input: default@c/b/o_t1@dt=2014 +PREHOOK: Output: default@c/b/o_t1 +PREHOOK: Output: default@c/b/o_t1@dt=2014 #### A masked pattern was here #### POSTHOOK: query: analyze table `c/b/o_t1` compute statistics for columns key, value, c_int, c_float, c_boolean POSTHOOK: type: QUERY POSTHOOK: Input: default@c/b/o_t1 POSTHOOK: Input: default@c/b/o_t1@dt=2014 +POSTHOOK: Output: default@c/b/o_t1 +POSTHOOK: Output: default@c/b/o_t1@dt=2014 #### A masked pattern was here #### PREHOOK: query: analyze table `//cbo_t2` partition (dt) compute statistics PREHOOK: type: QUERY @@ -184,11 +188,15 @@ PREHOOK: query: analyze table `//cbo_t2` compute statistics for columns key, val PREHOOK: type: QUERY PREHOOK: Input: default@//cbo_t2 PREHOOK: Input: default@//cbo_t2@dt=2014 +PREHOOK: Output: default@//cbo_t2 +PREHOOK: Output: default@//cbo_t2@dt=2014 #### A masked pattern was here #### POSTHOOK: query: analyze table `//cbo_t2` compute statistics for columns key, value, c_int, c_float, c_boolean POSTHOOK: type: QUERY POSTHOOK: Input: default@//cbo_t2 POSTHOOK: Input: default@//cbo_t2@dt=2014 +POSTHOOK: Output: default@//cbo_t2 +POSTHOOK: Output: default@//cbo_t2@dt=2014 #### A masked pattern was here #### PREHOOK: query: analyze table `cbo_/t3////` compute statistics PREHOOK: type: QUERY @@ -201,10 +209,12 @@ POSTHOOK: Output: default@cbo_/t3//// PREHOOK: query: analyze table `cbo_/t3////` compute statistics for columns key, value, c_int, c_float, c_boolean PREHOOK: type: QUERY PREHOOK: Input: default@cbo_/t3//// +PREHOOK: Output: default@cbo_/t3//// #### A masked pattern was here #### POSTHOOK: query: analyze table `cbo_/t3////` compute statistics for columns key, value, c_int, c_float, c_boolean POSTHOOK: type: QUERY POSTHOOK: Input: default@cbo_/t3//// +POSTHOOK: Output: default@cbo_/t3//// #### A masked pattern was here #### PREHOOK: query: analyze table `src/_/cbo` compute statistics PREHOOK: type: QUERY @@ -217,10 +227,12 @@ POSTHOOK: Output: default@src/_/cbo PREHOOK: query: analyze table `src/_/cbo` compute statistics for columns PREHOOK: type: QUERY PREHOOK: Input: default@src/_/cbo +PREHOOK: Output: default@src/_/cbo #### A masked pattern was here #### POSTHOOK: query: analyze table `src/_/cbo` compute statistics for columns POSTHOOK: type: QUERY POSTHOOK: Input: default@src/_/cbo +POSTHOOK: Output: default@src/_/cbo #### A masked pattern was here #### PREHOOK: query: analyze table `p/a/r/t` compute statistics PREHOOK: type: QUERY @@ -233,10 +245,12 @@ POSTHOOK: Output: default@p/a/r/t PREHOOK: query: analyze table `p/a/r/t` compute statistics for columns PREHOOK: type: QUERY PREHOOK: Input: default@p/a/r/t +PREHOOK: Output: default@p/a/r/t #### A masked pattern was here #### POSTHOOK: query: analyze table `p/a/r/t` compute statistics for columns POSTHOOK: type: QUERY POSTHOOK: Input: default@p/a/r/t +POSTHOOK: Output: default@p/a/r/t #### A masked pattern was here #### PREHOOK: query: analyze table `line/item` compute statistics PREHOOK: type: QUERY @@ -249,10 +263,12 @@ POSTHOOK: Output: default@line/item PREHOOK: query: analyze table `line/item` compute statistics for columns PREHOOK: type: QUERY PREHOOK: Input: default@line/item +PREHOOK: Output: default@line/item #### A masked pattern was here #### POSTHOOK: query: analyze table `line/item` compute statistics for columns POSTHOOK: type: QUERY POSTHOOK: Input: default@line/item +POSTHOOK: Output: default@line/item #### A masked pattern was here #### PREHOOK: query: select key, (c_int+1)+2 as x, sum(c_int) from `c/b/o_t1` group by c_float, `c/b/o_t1`.c_int, key PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/llap/stats_only_null.q.out b/ql/src/test/results/clientpositive/llap/stats_only_null.q.out index c905ceb..86163fe 100644 --- a/ql/src/test/results/clientpositive/llap/stats_only_null.q.out +++ b/ql/src/test/results/clientpositive/llap/stats_only_null.q.out @@ -189,10 +189,12 @@ STAGE PLANS: PREHOOK: query: analyze table stats_null compute statistics for columns a,b,c,d PREHOOK: type: QUERY PREHOOK: Input: default@stats_null +PREHOOK: Output: default@stats_null #### A masked pattern was here #### POSTHOOK: query: analyze table stats_null compute statistics for columns a,b,c,d POSTHOOK: type: QUERY POSTHOOK: Input: default@stats_null +POSTHOOK: Output: default@stats_null #### A masked pattern was here #### PREHOOK: query: analyze table stats_null_part partition(dt='2010') compute statistics for columns a,b,c,d PREHOOK: type: QUERY @@ -384,12 +386,18 @@ PREHOOK: type: QUERY PREHOOK: Input: default@stats_null_part PREHOOK: Input: default@stats_null_part@dt=1 PREHOOK: Input: default@stats_null_part@dt=__HIVE_DEFAULT_PARTITION__ +PREHOOK: Output: default@stats_null_part +PREHOOK: Output: default@stats_null_part@dt=1 +PREHOOK: Output: default@stats_null_part@dt=__HIVE_DEFAULT_PARTITION__ #### A masked pattern was here #### POSTHOOK: query: analyze table stats_null_part compute statistics for columns POSTHOOK: type: QUERY POSTHOOK: Input: default@stats_null_part POSTHOOK: Input: default@stats_null_part@dt=1 POSTHOOK: Input: default@stats_null_part@dt=__HIVE_DEFAULT_PARTITION__ +POSTHOOK: Output: default@stats_null_part +POSTHOOK: Output: default@stats_null_part@dt=1 +POSTHOOK: Output: default@stats_null_part@dt=__HIVE_DEFAULT_PARTITION__ #### A masked pattern was here #### PREHOOK: query: describe formatted stats_null_part partition(dt = 1) a PREHOOK: type: DESCTABLE diff --git a/ql/src/test/results/clientpositive/llap/union_remove_26.q.out b/ql/src/test/results/clientpositive/llap/union_remove_26.q.out index 18b9aa5..67fef54 100644 --- a/ql/src/test/results/clientpositive/llap/union_remove_26.q.out +++ b/ql/src/test/results/clientpositive/llap/union_remove_26.q.out @@ -103,18 +103,22 @@ POSTHOOK: Lineage: inputtbl3.val SIMPLE [(inputsrctbl3)inputsrctbl3.FieldSchema( PREHOOK: query: analyze table inputTbl1 compute statistics for columns PREHOOK: type: QUERY PREHOOK: Input: default@inputtbl1 +PREHOOK: Output: default@inputtbl1 #### A masked pattern was here #### POSTHOOK: query: analyze table inputTbl1 compute statistics for columns POSTHOOK: type: QUERY POSTHOOK: Input: default@inputtbl1 +POSTHOOK: Output: default@inputtbl1 #### A masked pattern was here #### PREHOOK: query: analyze table inputTbl3 compute statistics for columns PREHOOK: type: QUERY PREHOOK: Input: default@inputtbl3 +PREHOOK: Output: default@inputtbl3 #### A masked pattern was here #### POSTHOOK: query: analyze table inputTbl3 compute statistics for columns POSTHOOK: type: QUERY POSTHOOK: Input: default@inputtbl3 +POSTHOOK: Output: default@inputtbl3 #### A masked pattern was here #### PREHOOK: query: explain SELECT count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl1 @@ -282,10 +286,12 @@ POSTHOOK: Input: default@inputtbl3 PREHOOK: query: analyze table inputTbl2 compute statistics for columns PREHOOK: type: QUERY PREHOOK: Input: default@inputtbl2 +PREHOOK: Output: default@inputtbl2 #### A masked pattern was here #### POSTHOOK: query: analyze table inputTbl2 compute statistics for columns POSTHOOK: type: QUERY POSTHOOK: Input: default@inputtbl2 +POSTHOOK: Output: default@inputtbl2 #### A masked pattern was here #### PREHOOK: query: explain SELECT count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl1 diff --git a/ql/src/test/results/clientpositive/llap/vector_outer_join1.q.out b/ql/src/test/results/clientpositive/llap/vector_outer_join1.q.out index 2ea8c8f..1a1f79d 100644 --- a/ql/src/test/results/clientpositive/llap/vector_outer_join1.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_outer_join1.q.out @@ -186,10 +186,12 @@ POSTHOOK: Output: default@small_alltypesorc_a PREHOOK: query: ANALYZE TABLE small_alltypesorc_a COMPUTE STATISTICS FOR COLUMNS PREHOOK: type: QUERY PREHOOK: Input: default@small_alltypesorc_a +PREHOOK: Output: default@small_alltypesorc_a #### A masked pattern was here #### POSTHOOK: query: ANALYZE TABLE small_alltypesorc_a COMPUTE STATISTICS FOR COLUMNS POSTHOOK: type: QUERY POSTHOOK: Input: default@small_alltypesorc_a +POSTHOOK: Output: default@small_alltypesorc_a #### A masked pattern was here #### PREHOOK: query: select * from small_alltypesorc_a PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/llap/vector_outer_join2.q.out b/ql/src/test/results/clientpositive/llap/vector_outer_join2.q.out index 2077dce..f76c301 100644 --- a/ql/src/test/results/clientpositive/llap/vector_outer_join2.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_outer_join2.q.out @@ -191,10 +191,12 @@ POSTHOOK: Output: default@small_alltypesorc_a PREHOOK: query: ANALYZE TABLE small_alltypesorc_a COMPUTE STATISTICS FOR COLUMNS PREHOOK: type: QUERY PREHOOK: Input: default@small_alltypesorc_a +PREHOOK: Output: default@small_alltypesorc_a #### A masked pattern was here #### POSTHOOK: query: ANALYZE TABLE small_alltypesorc_a COMPUTE STATISTICS FOR COLUMNS POSTHOOK: type: QUERY POSTHOOK: Input: default@small_alltypesorc_a +POSTHOOK: Output: default@small_alltypesorc_a #### A masked pattern was here #### PREHOOK: query: select * from small_alltypesorc_a PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/llap/vector_outer_join3.q.out b/ql/src/test/results/clientpositive/llap/vector_outer_join3.q.out index dbbfd34..f8d1ec2 100644 --- a/ql/src/test/results/clientpositive/llap/vector_outer_join3.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_outer_join3.q.out @@ -191,10 +191,12 @@ POSTHOOK: Output: default@small_alltypesorc_a PREHOOK: query: ANALYZE TABLE small_alltypesorc_a COMPUTE STATISTICS FOR COLUMNS PREHOOK: type: QUERY PREHOOK: Input: default@small_alltypesorc_a +PREHOOK: Output: default@small_alltypesorc_a #### A masked pattern was here #### POSTHOOK: query: ANALYZE TABLE small_alltypesorc_a COMPUTE STATISTICS FOR COLUMNS POSTHOOK: type: QUERY POSTHOOK: Input: default@small_alltypesorc_a +POSTHOOK: Output: default@small_alltypesorc_a #### A masked pattern was here #### PREHOOK: query: select * from small_alltypesorc_a PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/llap/vector_outer_join4.q.out b/ql/src/test/results/clientpositive/llap/vector_outer_join4.q.out index ffce9e6..a55250b 100644 --- a/ql/src/test/results/clientpositive/llap/vector_outer_join4.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_outer_join4.q.out @@ -201,10 +201,12 @@ POSTHOOK: Output: default@small_alltypesorc_b PREHOOK: query: ANALYZE TABLE small_alltypesorc_b COMPUTE STATISTICS FOR COLUMNS PREHOOK: type: QUERY PREHOOK: Input: default@small_alltypesorc_b +PREHOOK: Output: default@small_alltypesorc_b #### A masked pattern was here #### POSTHOOK: query: ANALYZE TABLE small_alltypesorc_b COMPUTE STATISTICS FOR COLUMNS POSTHOOK: type: QUERY POSTHOOK: Input: default@small_alltypesorc_b +POSTHOOK: Output: default@small_alltypesorc_b #### A masked pattern was here #### PREHOOK: query: select * from small_alltypesorc_b PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/llap/vector_outer_join5.q.out b/ql/src/test/results/clientpositive/llap/vector_outer_join5.q.out index 4f25253..680ee42 100644 --- a/ql/src/test/results/clientpositive/llap/vector_outer_join5.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_outer_join5.q.out @@ -27,10 +27,12 @@ POSTHOOK: Output: default@sorted_mod_4 PREHOOK: query: ANALYZE TABLE sorted_mod_4 COMPUTE STATISTICS FOR COLUMNS PREHOOK: type: QUERY PREHOOK: Input: default@sorted_mod_4 +PREHOOK: Output: default@sorted_mod_4 #### A masked pattern was here #### POSTHOOK: query: ANALYZE TABLE sorted_mod_4 COMPUTE STATISTICS FOR COLUMNS POSTHOOK: type: QUERY POSTHOOK: Input: default@sorted_mod_4 +POSTHOOK: Output: default@sorted_mod_4 #### A masked pattern was here #### PREHOOK: query: create table small_table stored as orc as select ctinyint, cbigint from alltypesorc limit 100 @@ -57,10 +59,12 @@ POSTHOOK: Output: default@small_table PREHOOK: query: ANALYZE TABLE small_table COMPUTE STATISTICS FOR COLUMNS PREHOOK: type: QUERY PREHOOK: Input: default@small_table +PREHOOK: Output: default@small_table #### A masked pattern was here #### POSTHOOK: query: ANALYZE TABLE small_table COMPUTE STATISTICS FOR COLUMNS POSTHOOK: type: QUERY POSTHOOK: Input: default@small_table +POSTHOOK: Output: default@small_table #### A masked pattern was here #### PREHOOK: query: explain vectorization detail formatted select count(*) from (select s.*, st.* @@ -267,10 +271,12 @@ POSTHOOK: Output: default@mod_8_mod_4 PREHOOK: query: ANALYZE TABLE mod_8_mod_4 COMPUTE STATISTICS FOR COLUMNS PREHOOK: type: QUERY PREHOOK: Input: default@mod_8_mod_4 +PREHOOK: Output: default@mod_8_mod_4 #### A masked pattern was here #### POSTHOOK: query: ANALYZE TABLE mod_8_mod_4 COMPUTE STATISTICS FOR COLUMNS POSTHOOK: type: QUERY POSTHOOK: Input: default@mod_8_mod_4 +POSTHOOK: Output: default@mod_8_mod_4 #### A masked pattern was here #### PREHOOK: query: create table small_table2 stored as orc as select pmod(ctinyint, 16) as cmodtinyint, cbigint from alltypesorc limit 100 @@ -297,10 +303,12 @@ POSTHOOK: Output: default@small_table2 PREHOOK: query: ANALYZE TABLE small_table2 COMPUTE STATISTICS FOR COLUMNS PREHOOK: type: QUERY PREHOOK: Input: default@small_table2 +PREHOOK: Output: default@small_table2 #### A masked pattern was here #### POSTHOOK: query: ANALYZE TABLE small_table2 COMPUTE STATISTICS FOR COLUMNS POSTHOOK: type: QUERY POSTHOOK: Input: default@small_table2 +POSTHOOK: Output: default@small_table2 #### A masked pattern was here #### PREHOOK: query: explain vectorization detail formatted select count(*) from (select s.*, st.* diff --git a/ql/src/test/results/clientpositive/llap/vectorized_dynamic_semijoin_reduction.q.out b/ql/src/test/results/clientpositive/llap/vectorized_dynamic_semijoin_reduction.q.out index c9eec63..b7496fc 100644 --- a/ql/src/test/results/clientpositive/llap/vectorized_dynamic_semijoin_reduction.q.out +++ b/ql/src/test/results/clientpositive/llap/vectorized_dynamic_semijoin_reduction.q.out @@ -1632,10 +1632,12 @@ POSTHOOK: Output: default@dsrv_small PREHOOK: query: analyze table dsrv_small compute statistics for columns PREHOOK: type: QUERY PREHOOK: Input: default@dsrv_small +PREHOOK: Output: default@dsrv_small #### A masked pattern was here #### POSTHOOK: query: analyze table dsrv_small compute statistics for columns POSTHOOK: type: QUERY POSTHOOK: Input: default@dsrv_small +POSTHOOK: Output: default@dsrv_small #### A masked pattern was here #### PREHOOK: query: EXPLAIN select count(*) from dsrv_big a join dsrv_small b on (a.key_int = b.key_int) PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/llap/vectorized_dynamic_semijoin_reduction2.q.out b/ql/src/test/results/clientpositive/llap/vectorized_dynamic_semijoin_reduction2.q.out index 062fef6..a147084 100644 --- a/ql/src/test/results/clientpositive/llap/vectorized_dynamic_semijoin_reduction2.q.out +++ b/ql/src/test/results/clientpositive/llap/vectorized_dynamic_semijoin_reduction2.q.out @@ -73,18 +73,22 @@ POSTHOOK: Output: default@dsrv2_small PREHOOK: query: analyze table dsrv2_big compute statistics for columns PREHOOK: type: QUERY PREHOOK: Input: default@dsrv2_big +PREHOOK: Output: default@dsrv2_big #### A masked pattern was here #### POSTHOOK: query: analyze table dsrv2_big compute statistics for columns POSTHOOK: type: QUERY POSTHOOK: Input: default@dsrv2_big +POSTHOOK: Output: default@dsrv2_big #### A masked pattern was here #### PREHOOK: query: analyze table dsrv2_small compute statistics for columns PREHOOK: type: QUERY PREHOOK: Input: default@dsrv2_small +PREHOOK: Output: default@dsrv2_small #### A masked pattern was here #### POSTHOOK: query: analyze table dsrv2_small compute statistics for columns POSTHOOK: type: QUERY POSTHOOK: Input: default@dsrv2_small +POSTHOOK: Output: default@dsrv2_small #### A masked pattern was here #### PREHOOK: query: EXPLAIN select count(*) from dsrv2_big a join dsrv2_small b on (a.partkey_bigint = b.partkey_bigint) PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/metadata_only_queries.q.out b/ql/src/test/results/clientpositive/metadata_only_queries.q.out index 57b59dd..887f6d5 100644 --- a/ql/src/test/results/clientpositive/metadata_only_queries.q.out +++ b/ql/src/test/results/clientpositive/metadata_only_queries.q.out @@ -387,10 +387,12 @@ STAGE PLANS: PREHOOK: query: analyze table stats_tbl compute statistics for columns t,si,i,b,f,d,bo,s,bin PREHOOK: type: QUERY PREHOOK: Input: default@stats_tbl +PREHOOK: Output: default@stats_tbl #### A masked pattern was here #### POSTHOOK: query: analyze table stats_tbl compute statistics for columns t,si,i,b,f,d,bo,s,bin POSTHOOK: type: QUERY POSTHOOK: Input: default@stats_tbl +POSTHOOK: Output: default@stats_tbl #### A masked pattern was here #### PREHOOK: query: analyze table stats_tbl_part partition(dt='2010') compute statistics for columns t,si,i,b,f,d,bo,s,bin PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/partial_column_stats.q.out b/ql/src/test/results/clientpositive/partial_column_stats.q.out index 59b52b0..4cfab18 100644 --- a/ql/src/test/results/clientpositive/partial_column_stats.q.out +++ b/ql/src/test/results/clientpositive/partial_column_stats.q.out @@ -12,7 +12,8 @@ POSTHOOK: query: explain analyze table t1 compute statistics for columns POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-0 is a root stage - Stage-1 depends on stages: Stage-0 + Stage-1 depends on stages: Stage-0, Stage-2 + Stage-2 depends on stages: Stage-0 STAGE PLANS: Stage: Stage-0 @@ -55,13 +56,18 @@ STAGE PLANS: Column Types: int, string Table: default.t1 + Stage: Stage-2 + Stats-Aggr Operator + PREHOOK: query: analyze table t1 compute statistics for columns PREHOOK: type: QUERY PREHOOK: Input: default@t1 +PREHOOK: Output: default@t1 #### A masked pattern was here #### POSTHOOK: query: analyze table t1 compute statistics for columns POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 +POSTHOOK: Output: default@t1 #### A masked pattern was here #### PREHOOK: query: desc formatted t1 value PREHOOK: type: DESCTABLE diff --git a/ql/src/test/results/clientpositive/pcs.q.out b/ql/src/test/results/clientpositive/pcs.q.out index c422a3d..7794e7f 100644 --- a/ql/src/test/results/clientpositive/pcs.q.out +++ b/ql/src/test/results/clientpositive/pcs.q.out @@ -70,6 +70,10 @@ PREHOOK: Input: default@pcs_t1 PREHOOK: Input: default@pcs_t1@ds=2000-04-08 PREHOOK: Input: default@pcs_t1@ds=2000-04-09 PREHOOK: Input: default@pcs_t1@ds=2000-04-10 +PREHOOK: Output: default@pcs_t1 +PREHOOK: Output: default@pcs_t1@ds=2000-04-08 +PREHOOK: Output: default@pcs_t1@ds=2000-04-09 +PREHOOK: Output: default@pcs_t1@ds=2000-04-10 #### A masked pattern was here #### POSTHOOK: query: analyze table pcs_t1 partition(ds) compute statistics for columns POSTHOOK: type: QUERY @@ -77,6 +81,10 @@ POSTHOOK: Input: default@pcs_t1 POSTHOOK: Input: default@pcs_t1@ds=2000-04-08 POSTHOOK: Input: default@pcs_t1@ds=2000-04-09 POSTHOOK: Input: default@pcs_t1@ds=2000-04-10 +POSTHOOK: Output: default@pcs_t1 +POSTHOOK: Output: default@pcs_t1@ds=2000-04-08 +POSTHOOK: Output: default@pcs_t1@ds=2000-04-09 +POSTHOOK: Output: default@pcs_t1@ds=2000-04-10 #### A masked pattern was here #### PREHOOK: query: explain extended select key, value, ds from pcs_t1 where (ds='2000-04-08' and key=1) or (ds='2000-04-09' and key=2) order by key, value, ds PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/remove_exprs_stats.q.out b/ql/src/test/results/clientpositive/remove_exprs_stats.q.out index 8fe688d..e33cc07 100644 --- a/ql/src/test/results/clientpositive/remove_exprs_stats.q.out +++ b/ql/src/test/results/clientpositive/remove_exprs_stats.q.out @@ -55,10 +55,12 @@ POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name PREHOOK: query: analyze table loc_orc compute statistics for columns state,locid,zip,year PREHOOK: type: QUERY PREHOOK: Input: default@loc_orc +PREHOOK: Output: default@loc_orc #### A masked pattern was here #### POSTHOOK: query: analyze table loc_orc compute statistics for columns state,locid,zip,year POSTHOOK: type: QUERY POSTHOOK: Input: default@loc_orc +POSTHOOK: Output: default@loc_orc #### A masked pattern was here #### PREHOOK: query: explain select * from loc_orc where locid < 30 PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/rename_table_update_column_stats.q.out b/ql/src/test/results/clientpositive/rename_table_update_column_stats.q.out index c025cfa..2f13760 100644 --- a/ql/src/test/results/clientpositive/rename_table_update_column_stats.q.out +++ b/ql/src/test/results/clientpositive/rename_table_update_column_stats.q.out @@ -47,10 +47,12 @@ PREHOOK: query: analyze table testtable1 compute statistics for columns PREHOOK: type: QUERY PREHOOK: Input: statsdb1@testtable1 #### A masked pattern was here #### +PREHOOK: Output: statsdb1@testtable1 POSTHOOK: query: analyze table testtable1 compute statistics for columns POSTHOOK: type: QUERY POSTHOOK: Input: statsdb1@testtable1 #### A masked pattern was here #### +POSTHOOK: Output: statsdb1@testtable1 PREHOOK: query: describe formatted statsdb1.testtable1 col1 PREHOOK: type: DESCTABLE PREHOOK: Input: statsdb1@testtable1 @@ -187,10 +189,12 @@ PREHOOK: query: analyze table testtable1 compute statistics for columns PREHOOK: type: QUERY PREHOOK: Input: statsdb1@testtable1 #### A masked pattern was here #### +PREHOOK: Output: statsdb1@testtable1 POSTHOOK: query: analyze table testtable1 compute statistics for columns POSTHOOK: type: QUERY POSTHOOK: Input: statsdb1@testtable1 #### A masked pattern was here #### +POSTHOOK: Output: statsdb1@testtable1 PREHOOK: query: describe formatted statsdb1.testtable1 col1 PREHOOK: type: DESCTABLE PREHOOK: Input: statsdb1@testtable1 diff --git a/ql/src/test/results/clientpositive/spark/annotate_stats_join.q.out b/ql/src/test/results/clientpositive/spark/annotate_stats_join.q.out index 31ccc5c..a93ae48 100644 --- a/ql/src/test/results/clientpositive/spark/annotate_stats_join.q.out +++ b/ql/src/test/results/clientpositive/spark/annotate_stats_join.q.out @@ -97,26 +97,32 @@ POSTHOOK: Output: default@loc PREHOOK: query: analyze table emp compute statistics for columns lastname,deptid,locid PREHOOK: type: QUERY PREHOOK: Input: default@emp +PREHOOK: Output: default@emp #### A masked pattern was here #### POSTHOOK: query: analyze table emp compute statistics for columns lastname,deptid,locid POSTHOOK: type: QUERY POSTHOOK: Input: default@emp +POSTHOOK: Output: default@emp #### A masked pattern was here #### PREHOOK: query: analyze table dept compute statistics for columns deptname,deptid PREHOOK: type: QUERY PREHOOK: Input: default@dept +PREHOOK: Output: default@dept #### A masked pattern was here #### POSTHOOK: query: analyze table dept compute statistics for columns deptname,deptid POSTHOOK: type: QUERY POSTHOOK: Input: default@dept +POSTHOOK: Output: default@dept #### A masked pattern was here #### PREHOOK: query: analyze table loc compute statistics for columns state,locid,zip,year PREHOOK: type: QUERY PREHOOK: Input: default@loc +PREHOOK: Output: default@loc #### A masked pattern was here #### POSTHOOK: query: analyze table loc compute statistics for columns state,locid,zip,year POSTHOOK: type: QUERY POSTHOOK: Input: default@loc +POSTHOOK: Output: default@loc #### A masked pattern was here #### PREHOOK: query: explain select * from emp e join dept d on (e.deptid = d.deptid) PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/spark/avro_decimal_native.q.out b/ql/src/test/results/clientpositive/spark/avro_decimal_native.q.out index cebc342..1fd98fa 100644 --- a/ql/src/test/results/clientpositive/spark/avro_decimal_native.q.out +++ b/ql/src/test/results/clientpositive/spark/avro_decimal_native.q.out @@ -25,10 +25,12 @@ POSTHOOK: Output: default@dec PREHOOK: query: ANALYZE TABLE dec COMPUTE STATISTICS FOR COLUMNS value PREHOOK: type: QUERY PREHOOK: Input: default@dec +PREHOOK: Output: default@dec #### A masked pattern was here #### POSTHOOK: query: ANALYZE TABLE dec COMPUTE STATISTICS FOR COLUMNS value POSTHOOK: type: QUERY POSTHOOK: Input: default@dec +POSTHOOK: Output: default@dec #### A masked pattern was here #### PREHOOK: query: DESC FORMATTED dec value PREHOOK: type: DESCTABLE diff --git a/ql/src/test/results/clientpositive/spark/limit_pushdown.q.out b/ql/src/test/results/clientpositive/spark/limit_pushdown.q.out index c81240d..1b9fd14 100644 --- a/ql/src/test/results/clientpositive/spark/limit_pushdown.q.out +++ b/ql/src/test/results/clientpositive/spark/limit_pushdown.q.out @@ -369,21 +369,21 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesorc - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cdouble (type: double) outputColumnNames: cdouble - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Group By Operator keys: cdouble (type: double) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: double) sort order: + Map-reduce partition columns: _col0 (type: double) - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.3 Reducer 2 Reduce Operator Tree: @@ -391,13 +391,13 @@ STAGE PLANS: keys: KEY._col0 (type: double) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 6144 Data size: 188618 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 20 - Statistics: Num rows: 20 Data size: 600 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 4300 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 20 Data size: 600 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 4300 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -458,40 +458,40 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesorc - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ctinyint (type: tinyint), cdouble (type: double) outputColumnNames: ctinyint, cdouble - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Group By Operator keys: ctinyint (type: tinyint), cdouble (type: double) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: tinyint), _col1 (type: double) sort order: ++ Map-reduce partition columns: _col0 (type: tinyint) - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: tinyint), KEY._col1 (type: double) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 6144 Data size: 188618 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(_col1) keys: _col0 (type: tinyint) mode: complete outputColumnNames: _col0, _col1 - Statistics: Num rows: 3072 Data size: 94309 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3072 Data size: 660491 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 20 - Statistics: Num rows: 20 Data size: 600 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 4300 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 20 Data size: 600 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 4300 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -552,40 +552,40 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesorc - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ctinyint (type: tinyint), cdouble (type: double) outputColumnNames: ctinyint, cdouble - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Group By Operator keys: ctinyint (type: tinyint), cdouble (type: double) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: tinyint), _col1 (type: double) sort order: ++ Map-reduce partition columns: _col0 (type: tinyint) - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: tinyint), KEY._col1 (type: double) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 6144 Data size: 188618 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(_col1) keys: _col0 (type: tinyint) mode: complete outputColumnNames: _col0, _col1 - Statistics: Num rows: 3072 Data size: 94309 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3072 Data size: 660491 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 20 - Statistics: Num rows: 20 Data size: 600 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 4300 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 20 Data size: 600 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 4300 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -646,22 +646,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesorc - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ctinyint (type: tinyint), cstring1 (type: string), cstring2 (type: string) outputColumnNames: ctinyint, cstring1, cstring2 - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(DISTINCT cstring1), count(DISTINCT cstring2) keys: ctinyint (type: tinyint), cstring1 (type: string), cstring2 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: tinyint), _col1 (type: string), _col2 (type: string) sort order: +++ Map-reduce partition columns: _col0 (type: tinyint) - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.3 Reducer 2 Reduce Operator Tree: @@ -670,13 +670,13 @@ STAGE PLANS: keys: KEY._col0 (type: tinyint) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6144 Data size: 188618 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 20 - Statistics: Num rows: 20 Data size: 600 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 4300 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 20 Data size: 600 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 4300 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/spark/metadata_only_queries.q.out b/ql/src/test/results/clientpositive/spark/metadata_only_queries.q.out index 543d0ef..5d2eaa0 100644 --- a/ql/src/test/results/clientpositive/spark/metadata_only_queries.q.out +++ b/ql/src/test/results/clientpositive/spark/metadata_only_queries.q.out @@ -411,10 +411,12 @@ STAGE PLANS: PREHOOK: query: analyze table stats_tbl compute statistics for columns t,si,i,b,f,d,bo,s,bin PREHOOK: type: QUERY PREHOOK: Input: default@stats_tbl +PREHOOK: Output: default@stats_tbl #### A masked pattern was here #### POSTHOOK: query: analyze table stats_tbl compute statistics for columns t,si,i,b,f,d,bo,s,bin POSTHOOK: type: QUERY POSTHOOK: Input: default@stats_tbl +POSTHOOK: Output: default@stats_tbl #### A masked pattern was here #### PREHOOK: query: analyze table stats_tbl_part partition(dt='2010') compute statistics for columns t,si,i,b,f,d,bo,s,bin PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/spark/stats_only_null.q.out b/ql/src/test/results/clientpositive/spark/stats_only_null.q.out index 359eea3..c439958 100644 --- a/ql/src/test/results/clientpositive/spark/stats_only_null.q.out +++ b/ql/src/test/results/clientpositive/spark/stats_only_null.q.out @@ -181,10 +181,12 @@ STAGE PLANS: PREHOOK: query: analyze table stats_null compute statistics for columns a,b,c,d PREHOOK: type: QUERY PREHOOK: Input: default@stats_null +PREHOOK: Output: default@stats_null #### A masked pattern was here #### POSTHOOK: query: analyze table stats_null compute statistics for columns a,b,c,d POSTHOOK: type: QUERY POSTHOOK: Input: default@stats_null +POSTHOOK: Output: default@stats_null #### A masked pattern was here #### PREHOOK: query: analyze table stats_null_part partition(dt='2010') compute statistics for columns a,b,c,d PREHOOK: type: QUERY @@ -376,12 +378,18 @@ PREHOOK: type: QUERY PREHOOK: Input: default@stats_null_part PREHOOK: Input: default@stats_null_part@dt=1 PREHOOK: Input: default@stats_null_part@dt=__HIVE_DEFAULT_PARTITION__ +PREHOOK: Output: default@stats_null_part +PREHOOK: Output: default@stats_null_part@dt=1 +PREHOOK: Output: default@stats_null_part@dt=__HIVE_DEFAULT_PARTITION__ #### A masked pattern was here #### POSTHOOK: query: analyze table stats_null_part compute statistics for columns POSTHOOK: type: QUERY POSTHOOK: Input: default@stats_null_part POSTHOOK: Input: default@stats_null_part@dt=1 POSTHOOK: Input: default@stats_null_part@dt=__HIVE_DEFAULT_PARTITION__ +POSTHOOK: Output: default@stats_null_part +POSTHOOK: Output: default@stats_null_part@dt=1 +POSTHOOK: Output: default@stats_null_part@dt=__HIVE_DEFAULT_PARTITION__ #### A masked pattern was here #### PREHOOK: query: describe formatted stats_null_part partition(dt = 1) a PREHOOK: type: DESCTABLE diff --git a/ql/src/test/results/clientpositive/spark/vector_elt.q.out b/ql/src/test/results/clientpositive/spark/vector_elt.q.out index b49462a..00f5292 100644 --- a/ql/src/test/results/clientpositive/spark/vector_elt.q.out +++ b/ql/src/test/results/clientpositive/spark/vector_elt.q.out @@ -23,7 +23,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesorc - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] @@ -33,7 +33,7 @@ STAGE PLANS: native: true predicateExpression: FilterLongColGreaterLongScalar(col 0, val 0) -> boolean predicate: (ctinyint > 0) (type: boolean) - Statistics: Num rows: 4096 Data size: 125745 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4096 Data size: 880654 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ((UDFToInteger(ctinyint) % 2) + 1) (type: int), cstring1 (type: string), cint (type: int), elt(((UDFToInteger(ctinyint) % 2) + 1), cstring1, cint) (type: string) outputColumnNames: _col0, _col1, _col2, _col3 @@ -42,19 +42,19 @@ STAGE PLANS: native: true projectedOutputColumns: [13, 6, 2, 16] selectExpressions: LongColAddLongScalar(col 12, val 1)(children: LongColModuloLongScalar(col 0, val 2)(children: col 0) -> 12:long) -> 13:long, VectorElt(columns [14, 6, 15])(children: LongColAddLongScalar(col 12, val 1)(children: LongColModuloLongScalar(col 0, val 2)(children: col 0) -> 12:long) -> 14:long, col 6, CastLongToString(col 2) -> 15:String) -> 16:string - Statistics: Num rows: 4096 Data size: 125745 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4096 Data size: 880654 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 10 Limit Vectorization: className: VectorLimitOperator native: true - Statistics: Num rows: 10 Data size: 300 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 2150 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 10 Data size: 300 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 2150 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -140,7 +140,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesorc - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] diff --git a/ql/src/test/results/clientpositive/spark/vector_left_outer_join.q.out b/ql/src/test/results/clientpositive/spark/vector_left_outer_join.q.out index 91af229..f171405 100644 --- a/ql/src/test/results/clientpositive/spark/vector_left_outer_join.q.out +++ b/ql/src/test/results/clientpositive/spark/vector_left_outer_join.q.out @@ -34,11 +34,11 @@ STAGE PLANS: Map Operator Tree: TableScan alias: cd - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cint (type: int) outputColumnNames: _col0 - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: 0 _col1 (type: int) @@ -58,11 +58,11 @@ STAGE PLANS: Map Operator Tree: TableScan alias: hd - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ctinyint (type: tinyint) outputColumnNames: _col0 - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: 0 _col0 (type: tinyint) @@ -89,11 +89,11 @@ STAGE PLANS: Map Operator Tree: TableScan alias: c - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ctinyint (type: tinyint), cint (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Left Outer Join0 to 1 @@ -103,7 +103,7 @@ STAGE PLANS: outputColumnNames: _col0 input vertices: 1 Map 3 - Statistics: Num rows: 13516 Data size: 414960 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13516 Data size: 2906160 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Left Outer Join0 to 1 @@ -112,7 +112,7 @@ STAGE PLANS: 1 _col0 (type: tinyint) input vertices: 1 Map 4 - Statistics: Num rows: 14867 Data size: 456456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 14867 Data size: 3196776 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() mode: hash diff --git a/ql/src/test/results/clientpositive/spark/vector_outer_join1.q.out b/ql/src/test/results/clientpositive/spark/vector_outer_join1.q.out index ca4ce15..8135ca4 100644 --- a/ql/src/test/results/clientpositive/spark/vector_outer_join1.q.out +++ b/ql/src/test/results/clientpositive/spark/vector_outer_join1.q.out @@ -186,10 +186,12 @@ POSTHOOK: Output: default@small_alltypesorc_a PREHOOK: query: ANALYZE TABLE small_alltypesorc_a COMPUTE STATISTICS FOR COLUMNS PREHOOK: type: QUERY PREHOOK: Input: default@small_alltypesorc_a +PREHOOK: Output: default@small_alltypesorc_a #### A masked pattern was here #### POSTHOOK: query: ANALYZE TABLE small_alltypesorc_a COMPUTE STATISTICS FOR COLUMNS POSTHOOK: type: QUERY POSTHOOK: Input: default@small_alltypesorc_a +POSTHOOK: Output: default@small_alltypesorc_a #### A masked pattern was here #### PREHOOK: query: select * from small_alltypesorc_a PREHOOK: type: QUERY @@ -244,7 +246,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: cd - Statistics: Num rows: 15 Data size: 4003 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 15 Data size: 3483 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] @@ -255,7 +257,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] - Statistics: Num rows: 15 Data size: 4003 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 15 Data size: 3483 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator Spark Hash Table Sink Vectorization: className: VectorSparkHashTableSinkOperator @@ -288,7 +290,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: c - Statistics: Num rows: 15 Data size: 4003 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 15 Data size: 3483 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] @@ -299,7 +301,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] - Statistics: Num rows: 15 Data size: 4003 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 15 Data size: 3483 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Left Outer Join0 to 1 @@ -319,13 +321,13 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23 input vertices: 1 Map 2 - Statistics: Num rows: 16 Data size: 4403 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 16 Data size: 3831 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 16 Data size: 4403 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 16 Data size: 3831 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -417,7 +419,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: hd - Statistics: Num rows: 15 Data size: 4003 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 15 Data size: 3483 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] @@ -428,7 +430,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [0] - Statistics: Num rows: 15 Data size: 4003 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 15 Data size: 3483 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator Spark Hash Table Sink Vectorization: className: VectorSparkHashTableSinkOperator @@ -461,7 +463,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: c - Statistics: Num rows: 15 Data size: 4003 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 15 Data size: 3483 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] @@ -472,7 +474,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [0] - Statistics: Num rows: 15 Data size: 4003 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 15 Data size: 3483 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Left Outer Join0 to 1 @@ -490,13 +492,13 @@ STAGE PLANS: outputColumnNames: _col0 input vertices: 1 Map 2 - Statistics: Num rows: 16 Data size: 4403 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 16 Data size: 3831 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 16 Data size: 4403 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 16 Data size: 3831 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -679,7 +681,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: cd - Statistics: Num rows: 15 Data size: 4003 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 15 Data size: 3483 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] @@ -690,7 +692,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [2] - Statistics: Num rows: 15 Data size: 4003 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 15 Data size: 3483 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator Spark Hash Table Sink Vectorization: className: VectorSparkHashTableSinkOperator @@ -718,7 +720,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: hd - Statistics: Num rows: 15 Data size: 4003 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 15 Data size: 3483 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] @@ -729,7 +731,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [0] - Statistics: Num rows: 15 Data size: 4003 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 15 Data size: 3483 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator Spark Hash Table Sink Vectorization: className: VectorSparkHashTableSinkOperator @@ -764,7 +766,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: c - Statistics: Num rows: 15 Data size: 4003 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 15 Data size: 3483 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] @@ -775,7 +777,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [0, 2] - Statistics: Num rows: 15 Data size: 4003 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 15 Data size: 3483 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Left Outer Join0 to 1 @@ -793,7 +795,7 @@ STAGE PLANS: outputColumnNames: _col0 input vertices: 1 Map 3 - Statistics: Num rows: 16 Data size: 4403 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 16 Data size: 3831 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Left Outer Join0 to 1 @@ -811,7 +813,7 @@ STAGE PLANS: outputColumnNames: _col0 input vertices: 1 Map 4 - Statistics: Num rows: 17 Data size: 4843 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 17 Data size: 4214 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(), sum(_col0) Group By Vectorization: diff --git a/ql/src/test/results/clientpositive/spark/vector_outer_join2.q.out b/ql/src/test/results/clientpositive/spark/vector_outer_join2.q.out index 8a9f90f..be45cdf 100644 --- a/ql/src/test/results/clientpositive/spark/vector_outer_join2.q.out +++ b/ql/src/test/results/clientpositive/spark/vector_outer_join2.q.out @@ -191,10 +191,12 @@ POSTHOOK: Output: default@small_alltypesorc_a PREHOOK: query: ANALYZE TABLE small_alltypesorc_a COMPUTE STATISTICS FOR COLUMNS PREHOOK: type: QUERY PREHOOK: Input: default@small_alltypesorc_a +PREHOOK: Output: default@small_alltypesorc_a #### A masked pattern was here #### POSTHOOK: query: ANALYZE TABLE small_alltypesorc_a COMPUTE STATISTICS FOR COLUMNS POSTHOOK: type: QUERY POSTHOOK: Input: default@small_alltypesorc_a +POSTHOOK: Output: default@small_alltypesorc_a #### A masked pattern was here #### PREHOOK: query: select * from small_alltypesorc_a PREHOOK: type: QUERY @@ -260,7 +262,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: cd - Statistics: Num rows: 20 Data size: 5237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 4431 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] @@ -271,7 +273,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [2] - Statistics: Num rows: 20 Data size: 5237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 4431 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator Spark Hash Table Sink Vectorization: className: VectorSparkHashTableSinkOperator @@ -299,7 +301,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: hd - Statistics: Num rows: 20 Data size: 5237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 4431 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] @@ -310,7 +312,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [3] - Statistics: Num rows: 20 Data size: 5237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 4431 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator Spark Hash Table Sink Vectorization: className: VectorSparkHashTableSinkOperator @@ -345,7 +347,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: c - Statistics: Num rows: 20 Data size: 5237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 4431 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] @@ -356,7 +358,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [2, 3] - Statistics: Num rows: 20 Data size: 5237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 4431 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Left Outer Join0 to 1 @@ -374,7 +376,7 @@ STAGE PLANS: outputColumnNames: _col1 input vertices: 1 Map 3 - Statistics: Num rows: 22 Data size: 5760 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 22 Data size: 4874 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Left Outer Join0 to 1 @@ -392,7 +394,7 @@ STAGE PLANS: outputColumnNames: _col1 input vertices: 1 Map 4 - Statistics: Num rows: 24 Data size: 6336 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 5361 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(), sum(_col1) Group By Vectorization: diff --git a/ql/src/test/results/clientpositive/spark/vector_outer_join3.q.out b/ql/src/test/results/clientpositive/spark/vector_outer_join3.q.out index dbbfd34..f8d1ec2 100644 --- a/ql/src/test/results/clientpositive/spark/vector_outer_join3.q.out +++ b/ql/src/test/results/clientpositive/spark/vector_outer_join3.q.out @@ -191,10 +191,12 @@ POSTHOOK: Output: default@small_alltypesorc_a PREHOOK: query: ANALYZE TABLE small_alltypesorc_a COMPUTE STATISTICS FOR COLUMNS PREHOOK: type: QUERY PREHOOK: Input: default@small_alltypesorc_a +PREHOOK: Output: default@small_alltypesorc_a #### A masked pattern was here #### POSTHOOK: query: ANALYZE TABLE small_alltypesorc_a COMPUTE STATISTICS FOR COLUMNS POSTHOOK: type: QUERY POSTHOOK: Input: default@small_alltypesorc_a +POSTHOOK: Output: default@small_alltypesorc_a #### A masked pattern was here #### PREHOOK: query: select * from small_alltypesorc_a PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/spark/vector_outer_join4.q.out b/ql/src/test/results/clientpositive/spark/vector_outer_join4.q.out index ffce9e6..a55250b 100644 --- a/ql/src/test/results/clientpositive/spark/vector_outer_join4.q.out +++ b/ql/src/test/results/clientpositive/spark/vector_outer_join4.q.out @@ -201,10 +201,12 @@ POSTHOOK: Output: default@small_alltypesorc_b PREHOOK: query: ANALYZE TABLE small_alltypesorc_b COMPUTE STATISTICS FOR COLUMNS PREHOOK: type: QUERY PREHOOK: Input: default@small_alltypesorc_b +PREHOOK: Output: default@small_alltypesorc_b #### A masked pattern was here #### POSTHOOK: query: ANALYZE TABLE small_alltypesorc_b COMPUTE STATISTICS FOR COLUMNS POSTHOOK: type: QUERY POSTHOOK: Input: default@small_alltypesorc_b +POSTHOOK: Output: default@small_alltypesorc_b #### A masked pattern was here #### PREHOOK: query: select * from small_alltypesorc_b PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/spark/vector_outer_join5.q.out b/ql/src/test/results/clientpositive/spark/vector_outer_join5.q.out index 4f25253..680ee42 100644 --- a/ql/src/test/results/clientpositive/spark/vector_outer_join5.q.out +++ b/ql/src/test/results/clientpositive/spark/vector_outer_join5.q.out @@ -27,10 +27,12 @@ POSTHOOK: Output: default@sorted_mod_4 PREHOOK: query: ANALYZE TABLE sorted_mod_4 COMPUTE STATISTICS FOR COLUMNS PREHOOK: type: QUERY PREHOOK: Input: default@sorted_mod_4 +PREHOOK: Output: default@sorted_mod_4 #### A masked pattern was here #### POSTHOOK: query: ANALYZE TABLE sorted_mod_4 COMPUTE STATISTICS FOR COLUMNS POSTHOOK: type: QUERY POSTHOOK: Input: default@sorted_mod_4 +POSTHOOK: Output: default@sorted_mod_4 #### A masked pattern was here #### PREHOOK: query: create table small_table stored as orc as select ctinyint, cbigint from alltypesorc limit 100 @@ -57,10 +59,12 @@ POSTHOOK: Output: default@small_table PREHOOK: query: ANALYZE TABLE small_table COMPUTE STATISTICS FOR COLUMNS PREHOOK: type: QUERY PREHOOK: Input: default@small_table +PREHOOK: Output: default@small_table #### A masked pattern was here #### POSTHOOK: query: ANALYZE TABLE small_table COMPUTE STATISTICS FOR COLUMNS POSTHOOK: type: QUERY POSTHOOK: Input: default@small_table +POSTHOOK: Output: default@small_table #### A masked pattern was here #### PREHOOK: query: explain vectorization detail formatted select count(*) from (select s.*, st.* @@ -267,10 +271,12 @@ POSTHOOK: Output: default@mod_8_mod_4 PREHOOK: query: ANALYZE TABLE mod_8_mod_4 COMPUTE STATISTICS FOR COLUMNS PREHOOK: type: QUERY PREHOOK: Input: default@mod_8_mod_4 +PREHOOK: Output: default@mod_8_mod_4 #### A masked pattern was here #### POSTHOOK: query: ANALYZE TABLE mod_8_mod_4 COMPUTE STATISTICS FOR COLUMNS POSTHOOK: type: QUERY POSTHOOK: Input: default@mod_8_mod_4 +POSTHOOK: Output: default@mod_8_mod_4 #### A masked pattern was here #### PREHOOK: query: create table small_table2 stored as orc as select pmod(ctinyint, 16) as cmodtinyint, cbigint from alltypesorc limit 100 @@ -297,10 +303,12 @@ POSTHOOK: Output: default@small_table2 PREHOOK: query: ANALYZE TABLE small_table2 COMPUTE STATISTICS FOR COLUMNS PREHOOK: type: QUERY PREHOOK: Input: default@small_table2 +PREHOOK: Output: default@small_table2 #### A masked pattern was here #### POSTHOOK: query: ANALYZE TABLE small_table2 COMPUTE STATISTICS FOR COLUMNS POSTHOOK: type: QUERY POSTHOOK: Input: default@small_table2 +POSTHOOK: Output: default@small_table2 #### A masked pattern was here #### PREHOOK: query: explain vectorization detail formatted select count(*) from (select s.*, st.* diff --git a/ql/src/test/results/clientpositive/spark/vectorization_0.q.out b/ql/src/test/results/clientpositive/spark/vectorization_0.q.out index 5086f53..f4f5f54 100644 --- a/ql/src/test/results/clientpositive/spark/vectorization_0.q.out +++ b/ql/src/test/results/clientpositive/spark/vectorization_0.q.out @@ -34,7 +34,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesorc - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] @@ -45,7 +45,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [0] - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: min(ctinyint), max(ctinyint), count(ctinyint), count() Group By Vectorization: @@ -190,7 +190,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesorc - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] @@ -201,7 +201,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [0] - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(ctinyint) Group By Vectorization: @@ -355,11 +355,11 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesorc - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ctinyint (type: tinyint) outputColumnNames: ctinyint - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: avg(ctinyint), variance(ctinyint), var_pop(ctinyint), var_samp(ctinyint), std(ctinyint), stddev(ctinyint), stddev_pop(ctinyint), stddev_samp(ctinyint) mode: hash @@ -488,7 +488,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesorc - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] @@ -499,7 +499,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [3] - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: min(cbigint), max(cbigint), count(cbigint), count() Group By Vectorization: @@ -644,7 +644,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesorc - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] @@ -655,7 +655,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [3] - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(cbigint) Group By Vectorization: @@ -809,11 +809,11 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesorc - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cbigint (type: bigint) outputColumnNames: cbigint - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: avg(cbigint), variance(cbigint), var_pop(cbigint), var_samp(cbigint), std(cbigint), stddev(cbigint), stddev_pop(cbigint), stddev_samp(cbigint) mode: hash @@ -942,7 +942,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesorc - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] @@ -953,7 +953,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [4] - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: min(cfloat), max(cfloat), count(cfloat), count() Group By Vectorization: @@ -1098,7 +1098,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesorc - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] @@ -1109,7 +1109,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [4] - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(cfloat) Group By Vectorization: @@ -1263,11 +1263,11 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesorc - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cfloat (type: float) outputColumnNames: cfloat - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: avg(cfloat), variance(cfloat), var_pop(cfloat), var_samp(cfloat), std(cfloat), stddev(cfloat), stddev_pop(cfloat), stddev_samp(cfloat) mode: hash @@ -1434,7 +1434,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesorc - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] @@ -1444,7 +1444,7 @@ STAGE PLANS: native: true predicateExpression: FilterExprOrExpr(children: FilterStringColLikeStringScalar(col 7, pattern %b%) -> boolean, FilterDecimalScalarNotEqualDecimalColumn(val 79.553, col 12)(children: CastLongToDecimal(col 2) -> 12:decimal(13,3)) -> boolean, FilterDoubleColLessDoubleColumn(col 13, col 5)(children: CastLongToDouble(col 3) -> 13:double) -> boolean, FilterExprAndExpr(children: FilterLongColGreaterEqualLongColumn(col 0, col 1)(children: col 0) -> boolean, FilterLongColEqualLongScalar(col 11, val 1) -> boolean, FilterLongScalarEqualLongColumn(val 3569, col 0)(children: col 0) -> boolean) -> boolean) -> boolean predicate: ((cstring2 like '%b%') or (79.553 <> CAST( cint AS decimal(13,3))) or (UDFToDouble(cbigint) < cdouble) or ((UDFToShort(ctinyint) >= csmallint) and (cboolean2 = 1) and (3569 = UDFToInteger(ctinyint)))) (type: boolean) - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cbigint (type: bigint), cfloat (type: float), ctinyint (type: tinyint) outputColumnNames: cbigint, cfloat, ctinyint @@ -1452,7 +1452,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [3, 4, 0] - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: avg(cbigint), stddev_pop(cbigint), var_samp(cbigint), count(), sum(cfloat), min(ctinyint) Group By Vectorization: @@ -1591,14 +1591,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesorc - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: ((cstring1 like 'a%') or (cstring1 like 'b%') or (cstring1 like 'c%') or ((length(cstring1) < 50) and (cstring1 like '%n') and (length(cstring1) > 0))) (type: boolean) - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Select Operator - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() mode: hash @@ -1631,7 +1631,7 @@ STAGE PLANS: name default.alltypesorc numFiles 1 numRows 12288 - rawDataSize 0 + rawDataSize 2641964 serialization.ddl struct alltypesorc { byte ctinyint, i16 csmallint, i32 cint, i64 cbigint, float cfloat, double cdouble, string cstring1, string cstring2, timestamp ctimestamp1, timestamp ctimestamp2, bool cboolean1, bool cboolean2} serialization.format 1 serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde @@ -1652,7 +1652,7 @@ STAGE PLANS: name default.alltypesorc numFiles 1 numRows 12288 - rawDataSize 0 + rawDataSize 2641964 serialization.ddl struct alltypesorc { byte ctinyint, i16 csmallint, i32 cint, i64 cbigint, float cfloat, double cdouble, string cstring1, string cstring2, timestamp ctimestamp1, timestamp ctimestamp2, bool cboolean1, bool cboolean2} serialization.format 1 serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde @@ -30399,22 +30399,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesorc - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: (((cint = 49) and (cfloat = 3.5)) or ((cint = 47) and (cfloat = 2.09)) or ((cint = 45) and (cfloat = 3.02))) (type: boolean) - Statistics: Num rows: 9216 Data size: 282927 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 9216 Data size: 1981473 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cbigint (type: bigint), cfloat (type: float), cdouble (type: double), cstring1 (type: string), cstring2 (type: string), ctimestamp1 (type: timestamp), ctimestamp2 (type: timestamp), cboolean1 (type: boolean), cboolean2 (type: boolean) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 - Statistics: Num rows: 9216 Data size: 282927 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 9216 Data size: 1981473 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 9216 Data size: 282927 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 9216 Data size: 1981473 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -30451,7 +30451,7 @@ STAGE PLANS: name default.alltypesorc numFiles 1 numRows 12288 - rawDataSize 0 + rawDataSize 2641964 serialization.ddl struct alltypesorc { byte ctinyint, i16 csmallint, i32 cint, i64 cbigint, float cfloat, double cdouble, string cstring1, string cstring2, timestamp ctimestamp1, timestamp ctimestamp2, bool cboolean1, bool cboolean2} serialization.format 1 serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde @@ -30472,7 +30472,7 @@ STAGE PLANS: name default.alltypesorc numFiles 1 numRows 12288 - rawDataSize 0 + rawDataSize 2641964 serialization.ddl struct alltypesorc { byte ctinyint, i16 csmallint, i32 cint, i64 cbigint, float cfloat, double cdouble, string cstring1, string cstring2, timestamp ctimestamp1, timestamp ctimestamp2, bool cboolean1, bool cboolean2} serialization.format 1 serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde @@ -30513,22 +30513,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesorc - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: (((cint = 49) and (cfloat = 3.5)) or ((cint = 47) and (cfloat = 2.09)) or ((cint = 45) and (cfloat = 3.02))) (type: boolean) - Statistics: Num rows: 9216 Data size: 282927 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 9216 Data size: 1981473 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cbigint (type: bigint), cfloat (type: float), cdouble (type: double), cstring1 (type: string), cstring2 (type: string), ctimestamp1 (type: timestamp), ctimestamp2 (type: timestamp), cboolean1 (type: boolean), cboolean2 (type: boolean) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 - Statistics: Num rows: 9216 Data size: 282927 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 9216 Data size: 1981473 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 9216 Data size: 282927 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 9216 Data size: 1981473 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -30565,7 +30565,7 @@ STAGE PLANS: name default.alltypesorc numFiles 1 numRows 12288 - rawDataSize 0 + rawDataSize 2641964 serialization.ddl struct alltypesorc { byte ctinyint, i16 csmallint, i32 cint, i64 cbigint, float cfloat, double cdouble, string cstring1, string cstring2, timestamp ctimestamp1, timestamp ctimestamp2, bool cboolean1, bool cboolean2} serialization.format 1 serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde @@ -30586,7 +30586,7 @@ STAGE PLANS: name default.alltypesorc numFiles 1 numRows 12288 - rawDataSize 0 + rawDataSize 2641964 serialization.ddl struct alltypesorc { byte ctinyint, i16 csmallint, i32 cint, i64 cbigint, float cfloat, double cdouble, string cstring1, string cstring2, timestamp ctimestamp1, timestamp ctimestamp2, bool cboolean1, bool cboolean2} serialization.format 1 serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde @@ -30627,22 +30627,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesorc - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: (((cint = 49) or (cfloat = 3.5)) and ((cint = 47) or (cfloat = 2.09)) and ((cint = 45) or (cfloat = 3.02))) (type: boolean) - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cbigint (type: bigint), cfloat (type: float), cdouble (type: double), cstring1 (type: string), cstring2 (type: string), ctimestamp1 (type: timestamp), ctimestamp2 (type: timestamp), cboolean1 (type: boolean), cboolean2 (type: boolean) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -30679,7 +30679,7 @@ STAGE PLANS: name default.alltypesorc numFiles 1 numRows 12288 - rawDataSize 0 + rawDataSize 2641964 serialization.ddl struct alltypesorc { byte ctinyint, i16 csmallint, i32 cint, i64 cbigint, float cfloat, double cdouble, string cstring1, string cstring2, timestamp ctimestamp1, timestamp ctimestamp2, bool cboolean1, bool cboolean2} serialization.format 1 serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde @@ -30700,7 +30700,7 @@ STAGE PLANS: name default.alltypesorc numFiles 1 numRows 12288 - rawDataSize 0 + rawDataSize 2641964 serialization.ddl struct alltypesorc { byte ctinyint, i16 csmallint, i32 cint, i64 cbigint, float cfloat, double cdouble, string cstring1, string cstring2, timestamp ctimestamp1, timestamp ctimestamp2, bool cboolean1, bool cboolean2} serialization.format 1 serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde @@ -30742,24 +30742,24 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesorc - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: (cstring1) IN ('biology', 'history', 'topology') (type: boolean) - Statistics: Num rows: 6144 Data size: 188618 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() keys: cstring1 (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 6144 Data size: 188618 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) null sort order: a sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 6144 Data size: 188618 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE tag: -1 value expressions: _col1 (type: bigint) auto parallelism: false @@ -30783,7 +30783,7 @@ STAGE PLANS: name default.alltypesorc numFiles 1 numRows 12288 - rawDataSize 0 + rawDataSize 2641964 serialization.ddl struct alltypesorc { byte ctinyint, i16 csmallint, i32 cint, i64 cbigint, float cfloat, double cdouble, string cstring1, string cstring2, timestamp ctimestamp1, timestamp ctimestamp2, bool cboolean1, bool cboolean2} serialization.format 1 serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde @@ -30804,7 +30804,7 @@ STAGE PLANS: name default.alltypesorc numFiles 1 numRows 12288 - rawDataSize 0 + rawDataSize 2641964 serialization.ddl struct alltypesorc { byte ctinyint, i16 csmallint, i32 cint, i64 cbigint, float cfloat, double cdouble, string cstring1, string cstring2, timestamp ctimestamp1, timestamp ctimestamp2, bool cboolean1, bool cboolean2} serialization.format 1 serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde @@ -30824,16 +30824,16 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 3072 Data size: 94309 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3072 Data size: 660491 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col1 (type: bigint), _col0 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 3072 Data size: 94309 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3072 Data size: 660491 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: string) null sort order: a sort order: + - Statistics: Num rows: 3072 Data size: 94309 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3072 Data size: 660491 Basic stats: COMPLETE Column stats: NONE tag: -1 value expressions: _col0 (type: bigint) auto parallelism: false @@ -30844,13 +30844,13 @@ STAGE PLANS: Select Operator expressions: VALUE._col0 (type: bigint), KEY.reducesinkkey0 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 3072 Data size: 94309 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3072 Data size: 660491 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 3072 Data size: 94309 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3072 Data size: 660491 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat diff --git a/ql/src/test/results/clientpositive/spark/vectorization_13.q.out b/ql/src/test/results/clientpositive/spark/vectorization_13.q.out index a6ef031..00c9328 100644 --- a/ql/src/test/results/clientpositive/spark/vectorization_13.q.out +++ b/ql/src/test/results/clientpositive/spark/vectorization_13.q.out @@ -84,7 +84,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesorc - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] @@ -94,7 +94,7 @@ STAGE PLANS: native: true predicateExpression: FilterExprOrExpr(children: FilterExprAndExpr(children: FilterDoubleColLessDoubleScalar(col 4, val 3569.0) -> boolean, FilterDoubleScalarGreaterEqualDoubleColumn(val 10.175, col 5) -> boolean, FilterLongColNotEqualLongScalar(col 10, val 1) -> boolean) -> boolean, FilterExprAndExpr(children: FilterDoubleColGreaterDoubleScalar(col 12, val 11.0)(children: CastTimestampToDouble(col 8) -> 12:double) -> boolean, FilterDoubleColNotEqualDoubleScalar(col 12, val 12.0)(children: CastTimestampToDouble(col 9) -> 12:double) -> boolean, FilterDecimalColLessDecimalScalar(col 13, val 9763215.5639)(children: CastLongToDecimal(col 0) -> 13:decimal(11,4)) -> boolean) -> boolean) -> boolean predicate: (((cfloat < 3569) and (10.175 >= cdouble) and (cboolean1 <> 1)) or ((UDFToDouble(ctimestamp1) > 11.0) and (UDFToDouble(ctimestamp2) <> 12.0) and (CAST( ctinyint AS decimal(11,4)) < 9763215.5639))) (type: boolean) - Statistics: Num rows: 2730 Data size: 83809 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2730 Data size: 586959 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cboolean1 (type: boolean), ctinyint (type: tinyint), ctimestamp1 (type: timestamp), cfloat (type: float), cstring1 (type: string) outputColumnNames: cboolean1, ctinyint, ctimestamp1, cfloat, cstring1 @@ -102,7 +102,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [10, 0, 8, 4, 6] - Statistics: Num rows: 2730 Data size: 83809 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2730 Data size: 586959 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: max(ctinyint), sum(cfloat), stddev_pop(cfloat), stddev_pop(ctinyint), max(cfloat), min(ctinyint) Group By Vectorization: @@ -116,12 +116,12 @@ STAGE PLANS: keys: cboolean1 (type: boolean), ctinyint (type: tinyint), ctimestamp1 (type: timestamp), cfloat (type: float), cstring1 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 - Statistics: Num rows: 2730 Data size: 83809 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2730 Data size: 586959 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: boolean), _col1 (type: tinyint), _col2 (type: timestamp), _col3 (type: float), _col4 (type: string) sort order: +++++ Map-reduce partition columns: _col0 (type: boolean), _col1 (type: tinyint), _col2 (type: timestamp), _col3 (type: float), _col4 (type: string) - Statistics: Num rows: 2730 Data size: 83809 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2730 Data size: 586959 Basic stats: COMPLETE Column stats: NONE value expressions: _col5 (type: tinyint), _col6 (type: double), _col7 (type: struct), _col8 (type: struct), _col9 (type: float), _col10 (type: tinyint) Execution mode: vectorized Map Vectorization: @@ -144,15 +144,15 @@ STAGE PLANS: keys: KEY._col0 (type: boolean), KEY._col1 (type: tinyint), KEY._col2 (type: timestamp), KEY._col3 (type: float), KEY._col4 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 - Statistics: Num rows: 1365 Data size: 41904 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1365 Data size: 293479 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: boolean), _col1 (type: tinyint), _col2 (type: timestamp), _col3 (type: float), _col4 (type: string), (- _col1) (type: tinyint), _col5 (type: tinyint), ((- _col1) + _col5) (type: tinyint), _col6 (type: double), (_col6 * UDFToDouble(((- _col1) + _col5))) (type: double), (- _col6) (type: double), (79.553 * _col3) (type: float), _col7 (type: double), (- _col6) (type: double), _col8 (type: double), (CAST( ((- _col1) + _col5) AS decimal(3,0)) - 10.175) (type: decimal(7,3)), (- (- _col6)) (type: double), (-26.28 / (- (- _col6))) (type: double), _col9 (type: float), ((_col6 * UDFToDouble(((- _col1) + _col5))) / UDFToDouble(_col1)) (type: double), _col10 (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 - Statistics: Num rows: 1365 Data size: 41904 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1365 Data size: 293479 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: boolean), _col1 (type: tinyint), _col2 (type: timestamp), _col3 (type: float), _col4 (type: string), _col5 (type: tinyint), _col6 (type: tinyint), _col7 (type: tinyint), _col8 (type: double), _col9 (type: double), _col10 (type: double), _col11 (type: float), _col12 (type: double), _col13 (type: double), _col14 (type: double), _col15 (type: decimal(7,3)), _col16 (type: double), _col17 (type: double), _col18 (type: float), _col19 (type: double), _col20 (type: tinyint) sort order: +++++++++++++++++++++ - Statistics: Num rows: 1365 Data size: 41904 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1365 Data size: 293479 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 Reducer 3 Execution mode: vectorized @@ -171,19 +171,19 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 10, 14, 15, 16, 17, 18, 19, 20] - Statistics: Num rows: 1365 Data size: 41904 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1365 Data size: 293479 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 40 Limit Vectorization: className: VectorLimitOperator native: true - Statistics: Num rows: 40 Data size: 1200 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 8600 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 40 Data size: 1200 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 8600 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -389,7 +389,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesorc - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] @@ -399,7 +399,7 @@ STAGE PLANS: native: true predicateExpression: FilterExprOrExpr(children: FilterExprAndExpr(children: FilterDoubleColLessDoubleScalar(col 4, val 3569.0) -> boolean, FilterDoubleScalarGreaterEqualDoubleColumn(val 10.175, col 5) -> boolean, FilterLongColNotEqualLongScalar(col 10, val 1) -> boolean) -> boolean, FilterExprAndExpr(children: FilterDoubleColGreaterDoubleScalar(col 12, val -1.388)(children: CastTimestampToDouble(col 8) -> 12:double) -> boolean, FilterDoubleColNotEqualDoubleScalar(col 12, val -1.3359999999999999)(children: CastTimestampToDouble(col 9) -> 12:double) -> boolean, FilterDecimalColLessDecimalScalar(col 13, val 9763215.5639)(children: CastLongToDecimal(col 0) -> 13:decimal(11,4)) -> boolean) -> boolean) -> boolean predicate: (((cfloat < 3569) and (10.175 >= cdouble) and (cboolean1 <> 1)) or ((UDFToDouble(ctimestamp1) > -1.388) and (UDFToDouble(ctimestamp2) <> -1.3359999999999999) and (CAST( ctinyint AS decimal(11,4)) < 9763215.5639))) (type: boolean) - Statistics: Num rows: 2730 Data size: 83809 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2730 Data size: 586959 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cboolean1 (type: boolean), ctinyint (type: tinyint), ctimestamp1 (type: timestamp), cfloat (type: float), cstring1 (type: string) outputColumnNames: cboolean1, ctinyint, ctimestamp1, cfloat, cstring1 @@ -407,7 +407,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [10, 0, 8, 4, 6] - Statistics: Num rows: 2730 Data size: 83809 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2730 Data size: 586959 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: max(ctinyint), sum(cfloat), stddev_pop(cfloat), stddev_pop(ctinyint), max(cfloat), min(ctinyint) Group By Vectorization: @@ -421,12 +421,12 @@ STAGE PLANS: keys: cboolean1 (type: boolean), ctinyint (type: tinyint), ctimestamp1 (type: timestamp), cfloat (type: float), cstring1 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 - Statistics: Num rows: 2730 Data size: 83809 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2730 Data size: 586959 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: boolean), _col1 (type: tinyint), _col2 (type: timestamp), _col3 (type: float), _col4 (type: string) sort order: +++++ Map-reduce partition columns: _col0 (type: boolean), _col1 (type: tinyint), _col2 (type: timestamp), _col3 (type: float), _col4 (type: string) - Statistics: Num rows: 2730 Data size: 83809 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2730 Data size: 586959 Basic stats: COMPLETE Column stats: NONE value expressions: _col5 (type: tinyint), _col6 (type: double), _col7 (type: struct), _col8 (type: struct), _col9 (type: float), _col10 (type: tinyint) Execution mode: vectorized Map Vectorization: @@ -449,15 +449,15 @@ STAGE PLANS: keys: KEY._col0 (type: boolean), KEY._col1 (type: tinyint), KEY._col2 (type: timestamp), KEY._col3 (type: float), KEY._col4 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 - Statistics: Num rows: 1365 Data size: 41904 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1365 Data size: 293479 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: boolean), _col1 (type: tinyint), _col2 (type: timestamp), _col3 (type: float), _col4 (type: string), (- _col1) (type: tinyint), _col5 (type: tinyint), ((- _col1) + _col5) (type: tinyint), _col6 (type: double), (_col6 * UDFToDouble(((- _col1) + _col5))) (type: double), (- _col6) (type: double), (79.553 * _col3) (type: float), _col7 (type: double), (- _col6) (type: double), _col8 (type: double), (CAST( ((- _col1) + _col5) AS decimal(3,0)) - 10.175) (type: decimal(7,3)), (- (- _col6)) (type: double), (-26.28 / (- (- _col6))) (type: double), _col9 (type: float), ((_col6 * UDFToDouble(((- _col1) + _col5))) / UDFToDouble(_col1)) (type: double), _col10 (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 - Statistics: Num rows: 1365 Data size: 41904 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1365 Data size: 293479 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: boolean), _col1 (type: tinyint), _col2 (type: timestamp), _col3 (type: float), _col4 (type: string), _col5 (type: tinyint), _col6 (type: tinyint), _col7 (type: tinyint), _col8 (type: double), _col9 (type: double), _col10 (type: double), _col11 (type: float), _col12 (type: double), _col13 (type: double), _col14 (type: double), _col15 (type: decimal(7,3)), _col16 (type: double), _col17 (type: double), _col18 (type: float), _col19 (type: double), _col20 (type: tinyint) sort order: +++++++++++++++++++++ - Statistics: Num rows: 1365 Data size: 41904 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1365 Data size: 293479 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 Reducer 3 Execution mode: vectorized @@ -476,19 +476,19 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 10, 14, 15, 16, 17, 18, 19, 20] - Statistics: Num rows: 1365 Data size: 41904 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1365 Data size: 293479 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 40 Limit Vectorization: className: VectorLimitOperator native: true - Statistics: Num rows: 40 Data size: 1200 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 8600 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 40 Data size: 1200 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 8600 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/spark/vectorization_14.q.out b/ql/src/test/results/clientpositive/spark/vectorization_14.q.out index 1541908..d1ef666 100644 --- a/ql/src/test/results/clientpositive/spark/vectorization_14.q.out +++ b/ql/src/test/results/clientpositive/spark/vectorization_14.q.out @@ -84,25 +84,25 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesorc - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: ((UDFToLong(ctinyint) <= cbigint) and ((UDFToDouble(cint) <= cdouble) or (ctimestamp2 < ctimestamp1)) and (cdouble < UDFToDouble(ctinyint)) and ((cbigint > -257) or (cfloat < UDFToFloat(cint)))) (type: boolean) - Statistics: Num rows: 606 Data size: 18603 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 606 Data size: 130292 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ctimestamp1 (type: timestamp), cfloat (type: float), cstring1 (type: string), cboolean1 (type: boolean), cdouble (type: double), (- (-26.28 + cdouble)) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 606 Data size: 18603 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 606 Data size: 130292 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: stddev_samp(_col5), max(_col1), stddev_pop(_col1), count(_col1), var_pop(_col1), var_samp(_col1) keys: _col2 (type: string), _col1 (type: float), _col4 (type: double), _col0 (type: timestamp), _col3 (type: boolean) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 - Statistics: Num rows: 606 Data size: 18603 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 606 Data size: 130292 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: float), _col2 (type: double), _col3 (type: timestamp), _col4 (type: boolean) sort order: +++++ Map-reduce partition columns: _col0 (type: string), _col1 (type: float), _col2 (type: double), _col3 (type: timestamp), _col4 (type: boolean) - Statistics: Num rows: 606 Data size: 18603 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 606 Data size: 130292 Basic stats: COMPLETE Column stats: NONE value expressions: _col5 (type: struct), _col6 (type: float), _col7 (type: struct), _col8 (type: bigint), _col9 (type: struct), _col10 (type: struct) Execution mode: vectorized Map Vectorization: @@ -125,15 +125,15 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: float), KEY._col2 (type: double), KEY._col3 (type: timestamp), KEY._col4 (type: boolean) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 - Statistics: Num rows: 303 Data size: 9301 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 303 Data size: 65146 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col3 (type: timestamp), _col1 (type: float), _col0 (type: string), _col4 (type: boolean), _col2 (type: double), (-26.28 + _col2) (type: double), (- (-26.28 + _col2)) (type: double), _col5 (type: double), (_col1 * -26.28) (type: float), _col6 (type: float), (- _col1) (type: float), (- _col6) (type: float), ((- (-26.28 + _col2)) / 10.175) (type: double), _col7 (type: double), _col8 (type: bigint), (- ((- (-26.28 + _col2)) / 10.175)) (type: double), (-1.389 % _col5) (type: double), (UDFToDouble(_col1) - _col2) (type: double), _col9 (type: double), (_col9 % 10.175) (type: double), _col10 (type: double), (- (UDFToDouble(_col1) - _col2)) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21 - Statistics: Num rows: 303 Data size: 9301 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 303 Data size: 65146 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col2 (type: string), _col1 (type: float), _col4 (type: double), _col0 (type: timestamp) sort order: ++++ - Statistics: Num rows: 303 Data size: 9301 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 303 Data size: 65146 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: boolean), _col5 (type: double), _col6 (type: double), _col7 (type: double), _col8 (type: float), _col9 (type: float), _col10 (type: float), _col11 (type: float), _col12 (type: double), _col13 (type: double), _col14 (type: bigint), _col15 (type: double), _col16 (type: double), _col17 (type: double), _col18 (type: double), _col19 (type: double), _col20 (type: double), _col21 (type: double) Reducer 3 Execution mode: vectorized @@ -148,10 +148,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey3 (type: timestamp), KEY.reducesinkkey1 (type: float), KEY.reducesinkkey0 (type: string), VALUE._col0 (type: boolean), KEY.reducesinkkey2 (type: double), VALUE._col1 (type: double), VALUE._col2 (type: double), VALUE._col3 (type: double), VALUE._col4 (type: float), VALUE._col5 (type: float), VALUE._col6 (type: float), VALUE._col7 (type: float), VALUE._col8 (type: double), VALUE._col9 (type: double), VALUE._col10 (type: bigint), VALUE._col11 (type: double), VALUE._col12 (type: double), VALUE._col13 (type: double), VALUE._col14 (type: double), VALUE._col15 (type: double), VALUE._col16 (type: double), VALUE._col17 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21 - Statistics: Num rows: 303 Data size: 9301 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 303 Data size: 65146 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 303 Data size: 9301 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 303 Data size: 65146 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/spark/vectorization_15.q.out b/ql/src/test/results/clientpositive/spark/vectorization_15.q.out index 1d925c5..8d96d0d 100644 --- a/ql/src/test/results/clientpositive/spark/vectorization_15.q.out +++ b/ql/src/test/results/clientpositive/spark/vectorization_15.q.out @@ -80,25 +80,25 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesorc - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: ((cstring2 like '%ss%') or (cstring1 like '10%') or ((cint >= -75) and (UDFToShort(ctinyint) = csmallint) and (cdouble >= -3728.0))) (type: boolean) - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cfloat (type: float), cboolean1 (type: boolean), cdouble (type: double), cstring1 (type: string), ctinyint (type: tinyint), cint (type: int), ctimestamp1 (type: timestamp) outputColumnNames: cfloat, cboolean1, cdouble, cstring1, ctinyint, cint, ctimestamp1 - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: stddev_samp(cfloat), min(cdouble), stddev_samp(ctinyint), var_pop(ctinyint), var_samp(cint), stddev_pop(cint) keys: cfloat (type: float), cboolean1 (type: boolean), cdouble (type: double), cstring1 (type: string), ctinyint (type: tinyint), cint (type: int), ctimestamp1 (type: timestamp) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: float), _col1 (type: boolean), _col2 (type: double), _col3 (type: string), _col4 (type: tinyint), _col5 (type: int), _col6 (type: timestamp) sort order: +++++++ Map-reduce partition columns: _col0 (type: float), _col1 (type: boolean), _col2 (type: double), _col3 (type: string), _col4 (type: tinyint), _col5 (type: int), _col6 (type: timestamp) - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE value expressions: _col7 (type: struct), _col8 (type: double), _col9 (type: struct), _col10 (type: struct), _col11 (type: struct), _col12 (type: struct) Execution mode: vectorized Map Vectorization: @@ -121,15 +121,15 @@ STAGE PLANS: keys: KEY._col0 (type: float), KEY._col1 (type: boolean), KEY._col2 (type: double), KEY._col3 (type: string), KEY._col4 (type: tinyint), KEY._col5 (type: int), KEY._col6 (type: timestamp) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 - Statistics: Num rows: 6144 Data size: 188618 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: float), _col1 (type: boolean), _col2 (type: double), _col3 (type: string), _col4 (type: tinyint), _col5 (type: int), _col6 (type: timestamp), _col7 (type: double), (-26.28 - CAST( _col5 AS decimal(10,0))) (type: decimal(13,2)), _col8 (type: double), (_col2 * 79.553) (type: double), (33.0 % _col0) (type: float), _col9 (type: double), _col10 (type: double), (-23.0 % _col2) (type: double), (- _col4) (type: tinyint), _col11 (type: double), (UDFToFloat(_col5) - _col0) (type: float), (-23 % UDFToInteger(_col4)) (type: int), (- (-26.28 - CAST( _col5 AS decimal(10,0)))) (type: decimal(13,2)), _col12 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 - Statistics: Num rows: 6144 Data size: 188618 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: float), _col1 (type: boolean), _col2 (type: double), _col3 (type: string), _col4 (type: tinyint), _col5 (type: int), _col6 (type: timestamp) sort order: +++++++ - Statistics: Num rows: 6144 Data size: 188618 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE value expressions: _col7 (type: double), _col8 (type: decimal(13,2)), _col9 (type: double), _col10 (type: double), _col11 (type: float), _col12 (type: double), _col13 (type: double), _col14 (type: double), _col15 (type: tinyint), _col16 (type: double), _col17 (type: float), _col18 (type: int), _col19 (type: decimal(13,2)), _col20 (type: double) Reducer 3 Execution mode: vectorized @@ -144,10 +144,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: float), KEY.reducesinkkey1 (type: boolean), KEY.reducesinkkey2 (type: double), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: tinyint), KEY.reducesinkkey5 (type: int), KEY.reducesinkkey6 (type: timestamp), VALUE._col0 (type: double), VALUE._col1 (type: decimal(13,2)), VALUE._col2 (type: double), VALUE._col3 (type: double), VALUE._col4 (type: float), VALUE._col5 (type: double), VALUE._col6 (type: double), VALUE._col7 (type: double), VALUE._col8 (type: tinyint), VALUE._col9 (type: double), VALUE._col10 (type: float), VALUE._col11 (type: int), VALUE._col12 (type: decimal(13,2)), VALUE._col13 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 - Statistics: Num rows: 6144 Data size: 188618 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 6144 Data size: 188618 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/spark/vectorization_16.q.out b/ql/src/test/results/clientpositive/spark/vectorization_16.q.out index e731c2d..d7fade1 100644 --- a/ql/src/test/results/clientpositive/spark/vectorization_16.q.out +++ b/ql/src/test/results/clientpositive/spark/vectorization_16.q.out @@ -57,25 +57,25 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesorc - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: ((cstring2 like '%b%') and ((cdouble >= -1.389) or (cstring1 < 'a'))) (type: boolean) - Statistics: Num rows: 4096 Data size: 125745 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4096 Data size: 880654 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cdouble (type: double), cstring1 (type: string), ctimestamp1 (type: timestamp) outputColumnNames: cdouble, cstring1, ctimestamp1 - Statistics: Num rows: 4096 Data size: 125745 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4096 Data size: 880654 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(cdouble), stddev_samp(cdouble), min(cdouble) keys: cdouble (type: double), cstring1 (type: string), ctimestamp1 (type: timestamp) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 4096 Data size: 125745 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4096 Data size: 880654 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: double), _col1 (type: string), _col2 (type: timestamp) sort order: +++ Map-reduce partition columns: _col0 (type: double), _col1 (type: string), _col2 (type: timestamp) - Statistics: Num rows: 4096 Data size: 125745 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4096 Data size: 880654 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: bigint), _col4 (type: struct), _col5 (type: double) Execution mode: vectorized Map Vectorization: @@ -98,14 +98,14 @@ STAGE PLANS: keys: KEY._col0 (type: double), KEY._col1 (type: string), KEY._col2 (type: timestamp) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 2048 Data size: 62872 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2048 Data size: 440327 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col1 (type: string), _col0 (type: double), _col2 (type: timestamp), (_col0 - 9763215.5639) (type: double), (- (_col0 - 9763215.5639)) (type: double), _col3 (type: bigint), _col4 (type: double), (- _col4) (type: double), (_col4 * UDFToDouble(_col3)) (type: double), _col5 (type: double), (9763215.5639 / _col0) (type: double), (CAST( _col3 AS decimal(19,0)) / -1.389) (type: decimal(28,6)), _col4 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 - Statistics: Num rows: 2048 Data size: 62872 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2048 Data size: 440327 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 2048 Data size: 62872 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2048 Data size: 440327 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/spark/vectorization_17.q.out b/ql/src/test/results/clientpositive/spark/vectorization_17.q.out index a8f401b..c5060eb 100644 --- a/ql/src/test/results/clientpositive/spark/vectorization_17.q.out +++ b/ql/src/test/results/clientpositive/spark/vectorization_17.q.out @@ -65,18 +65,18 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesorc - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: ((cbigint > -23) and ((cdouble <> 988888.0) or (CAST( cint AS decimal(13,3)) > -863.257)) and ((ctinyint >= 33) or (UDFToLong(csmallint) >= cbigint) or (UDFToDouble(cfloat) = cdouble))) (type: boolean) - Statistics: Num rows: 4778 Data size: 146682 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4778 Data size: 1027287 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cfloat (type: float), cstring1 (type: string), cint (type: int), ctimestamp1 (type: timestamp), cdouble (type: double), cbigint (type: bigint), (UDFToDouble(cfloat) / UDFToDouble(ctinyint)) (type: double), (UDFToLong(cint) % cbigint) (type: bigint), (- cdouble) (type: double), (cdouble + (UDFToDouble(cfloat) / UDFToDouble(ctinyint))) (type: double), (cdouble / UDFToDouble(cint)) (type: double), (- (- cdouble)) (type: double), (9763215.5639 % CAST( cbigint AS decimal(19,0))) (type: decimal(11,4)), (2563.58 + (- (- cdouble))) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 - Statistics: Num rows: 4778 Data size: 146682 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4778 Data size: 1027287 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col5 (type: bigint), _col0 (type: float) sort order: ++ - Statistics: Num rows: 4778 Data size: 146682 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4778 Data size: 1027287 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string), _col2 (type: int), _col3 (type: timestamp), _col4 (type: double), _col6 (type: double), _col7 (type: bigint), _col8 (type: double), _col9 (type: double), _col10 (type: double), _col11 (type: double), _col12 (type: decimal(11,4)), _col13 (type: double) Execution mode: vectorized Map Vectorization: @@ -100,10 +100,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey1 (type: float), VALUE._col0 (type: string), VALUE._col1 (type: int), VALUE._col2 (type: timestamp), VALUE._col3 (type: double), KEY.reducesinkkey0 (type: bigint), VALUE._col4 (type: double), VALUE._col5 (type: bigint), VALUE._col6 (type: double), VALUE._col7 (type: double), VALUE._col8 (type: double), VALUE._col9 (type: double), VALUE._col10 (type: decimal(11,4)), VALUE._col11 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 - Statistics: Num rows: 4778 Data size: 146682 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4778 Data size: 1027287 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 4778 Data size: 146682 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4778 Data size: 1027287 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/spark/vectorization_9.q.out b/ql/src/test/results/clientpositive/spark/vectorization_9.q.out index e731c2d..d7fade1 100644 --- a/ql/src/test/results/clientpositive/spark/vectorization_9.q.out +++ b/ql/src/test/results/clientpositive/spark/vectorization_9.q.out @@ -57,25 +57,25 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesorc - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: ((cstring2 like '%b%') and ((cdouble >= -1.389) or (cstring1 < 'a'))) (type: boolean) - Statistics: Num rows: 4096 Data size: 125745 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4096 Data size: 880654 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cdouble (type: double), cstring1 (type: string), ctimestamp1 (type: timestamp) outputColumnNames: cdouble, cstring1, ctimestamp1 - Statistics: Num rows: 4096 Data size: 125745 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4096 Data size: 880654 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(cdouble), stddev_samp(cdouble), min(cdouble) keys: cdouble (type: double), cstring1 (type: string), ctimestamp1 (type: timestamp) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 4096 Data size: 125745 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4096 Data size: 880654 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: double), _col1 (type: string), _col2 (type: timestamp) sort order: +++ Map-reduce partition columns: _col0 (type: double), _col1 (type: string), _col2 (type: timestamp) - Statistics: Num rows: 4096 Data size: 125745 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4096 Data size: 880654 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: bigint), _col4 (type: struct), _col5 (type: double) Execution mode: vectorized Map Vectorization: @@ -98,14 +98,14 @@ STAGE PLANS: keys: KEY._col0 (type: double), KEY._col1 (type: string), KEY._col2 (type: timestamp) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 2048 Data size: 62872 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2048 Data size: 440327 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col1 (type: string), _col0 (type: double), _col2 (type: timestamp), (_col0 - 9763215.5639) (type: double), (- (_col0 - 9763215.5639)) (type: double), _col3 (type: bigint), _col4 (type: double), (- _col4) (type: double), (_col4 * UDFToDouble(_col3)) (type: double), _col5 (type: double), (9763215.5639 / _col0) (type: double), (CAST( _col3 AS decimal(19,0)) / -1.389) (type: decimal(28,6)), _col4 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 - Statistics: Num rows: 2048 Data size: 62872 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2048 Data size: 440327 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 2048 Data size: 62872 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2048 Data size: 440327 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/spark/vectorization_div0.q.out b/ql/src/test/results/clientpositive/spark/vectorization_div0.q.out index baea88f..eb1578a 100644 --- a/ql/src/test/results/clientpositive/spark/vectorization_div0.q.out +++ b/ql/src/test/results/clientpositive/spark/vectorization_div0.q.out @@ -21,7 +21,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesorc - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] @@ -33,19 +33,19 @@ STAGE PLANS: native: true projectedOutputColumns: [12] selectExpressions: DoubleColDivideDoubleScalar(col 5, val 0.0) -> 12:double - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 100 Limit Vectorization: className: VectorLimitOperator native: true - Statistics: Num rows: 100 Data size: 3000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 100 Data size: 21500 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 100 Data size: 3000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 100 Data size: 21500 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -201,7 +201,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesorc - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] @@ -211,7 +211,7 @@ STAGE PLANS: native: true predicateExpression: FilterExprAndExpr(children: FilterLongColGreaterLongScalar(col 3, val 0) -> boolean, FilterLongColLessLongScalar(col 3, val 100000000) -> boolean) -> boolean predicate: ((cbigint > 0) and (cbigint < 100000000)) (type: boolean) - Statistics: Num rows: 1365 Data size: 41904 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1365 Data size: 293479 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: (cbigint - 988888) (type: bigint), (cdouble / UDFToDouble((cbigint - 988888))) (type: double), (1.2 / CAST( (cbigint - 988888) AS decimal(19,0))) (type: decimal(22,21)) outputColumnNames: _col0, _col1, _col2 @@ -220,7 +220,7 @@ STAGE PLANS: native: true projectedOutputColumns: [12, 15, 17] selectExpressions: LongColSubtractLongScalar(col 3, val 988888) -> 12:long, DoubleColDivideDoubleColumn(col 5, col 14)(children: CastLongToDouble(col 13)(children: LongColSubtractLongScalar(col 3, val 988888) -> 13:long) -> 14:double) -> 15:double, DecimalScalarDivideDecimalColumn(val 1.2, col 16)(children: CastLongToDecimal(col 13)(children: LongColSubtractLongScalar(col 3, val 988888) -> 13:long) -> 16:decimal(19,0)) -> 17:decimal(22,21) - Statistics: Num rows: 1365 Data size: 41904 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1365 Data size: 293479 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: bigint), _col1 (type: double) sort order: ++ @@ -229,7 +229,7 @@ STAGE PLANS: native: false nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true nativeConditionsNotMet: No TopN IS false, Uniform Hash IS false - Statistics: Num rows: 1365 Data size: 41904 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1365 Data size: 293479 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col2 (type: decimal(22,21)) Execution mode: vectorized @@ -258,19 +258,19 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [0, 1, 2] - Statistics: Num rows: 1365 Data size: 41904 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1365 Data size: 293479 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 100 Limit Vectorization: className: VectorLimitOperator native: true - Statistics: Num rows: 100 Data size: 3000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 100 Data size: 21500 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 100 Data size: 3000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 100 Data size: 21500 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -419,7 +419,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesorc - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] @@ -429,7 +429,7 @@ STAGE PLANS: native: true predicateExpression: FilterExprAndExpr(children: FilterDoubleColGreaterEqualDoubleScalar(col 5, val -500.0) -> boolean, FilterDoubleColLessDoubleScalar(col 5, val -199.0) -> boolean) -> boolean predicate: ((cdouble >= -500.0) and (cdouble < -199.0)) (type: boolean) - Statistics: Num rows: 1365 Data size: 41904 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1365 Data size: 293479 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: (cdouble + 200.0) (type: double), (UDFToDouble(cbigint) / (cdouble + 200.0)) (type: double), ((cdouble + 200.0) / (cdouble + 200.0)) (type: double), (3.0 / (cdouble + 200.0)) (type: double), (1.2 / (cdouble + 200.0)) (type: double) outputColumnNames: _col0, _col1, _col2, _col4, _col5 @@ -438,7 +438,7 @@ STAGE PLANS: native: true projectedOutputColumns: [12, 15, 16, 14, 17] selectExpressions: DoubleColAddDoubleScalar(col 5, val 200.0) -> 12:double, DoubleColDivideDoubleColumn(col 13, col 14)(children: CastLongToDouble(col 3) -> 13:double, DoubleColAddDoubleScalar(col 5, val 200.0) -> 14:double) -> 15:double, DoubleColDivideDoubleColumn(col 13, col 14)(children: DoubleColAddDoubleScalar(col 5, val 200.0) -> 13:double, DoubleColAddDoubleScalar(col 5, val 200.0) -> 14:double) -> 16:double, DoubleScalarDivideDoubleColumn(val 3.0, col 13)(children: DoubleColAddDoubleScalar(col 5, val 200.0) -> 13:double) -> 14:double, DoubleScalarDivideDoubleColumn(val 1.2, col 13)(children: DoubleColAddDoubleScalar(col 5, val 200.0) -> 13:double) -> 17:double - Statistics: Num rows: 1365 Data size: 41904 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1365 Data size: 293479 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: double), _col1 (type: double) sort order: ++ @@ -447,7 +447,7 @@ STAGE PLANS: native: false nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true nativeConditionsNotMet: No TopN IS false, Uniform Hash IS false - Statistics: Num rows: 1365 Data size: 41904 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1365 Data size: 293479 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col2 (type: double), _col4 (type: double), _col5 (type: double) Execution mode: vectorized @@ -476,19 +476,19 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [0, 1, 2, 1, 3, 4] - Statistics: Num rows: 1365 Data size: 41904 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1365 Data size: 293479 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 100 Limit Vectorization: className: VectorLimitOperator native: true - Statistics: Num rows: 100 Data size: 3000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 100 Data size: 21500 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 100 Data size: 3000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 100 Data size: 21500 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/spark/vectorization_pushdown.q.out b/ql/src/test/results/clientpositive/spark/vectorization_pushdown.q.out index 1f1bb30..4e0916d 100644 --- a/ql/src/test/results/clientpositive/spark/vectorization_pushdown.q.out +++ b/ql/src/test/results/clientpositive/spark/vectorization_pushdown.q.out @@ -23,14 +23,14 @@ STAGE PLANS: TableScan alias: alltypesorc filterExpr: (UDFToDouble(cbigint) < cdouble) (type: boolean) - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (UDFToDouble(cbigint) < cdouble) (type: boolean) - Statistics: Num rows: 4096 Data size: 125745 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4096 Data size: 880654 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cbigint (type: bigint) outputColumnNames: cbigint - Statistics: Num rows: 4096 Data size: 125745 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4096 Data size: 880654 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: avg(cbigint) mode: hash diff --git a/ql/src/test/results/clientpositive/spark/vectorization_short_regress.q.out b/ql/src/test/results/clientpositive/spark/vectorization_short_regress.q.out index 47664ce..5be9c00 100644 --- a/ql/src/test/results/clientpositive/spark/vectorization_short_regress.q.out +++ b/ql/src/test/results/clientpositive/spark/vectorization_short_regress.q.out @@ -91,7 +91,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesorc - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] @@ -101,7 +101,7 @@ STAGE PLANS: native: true predicateExpression: FilterExprOrExpr(children: FilterLongScalarEqualLongColumn(val 762, col 3) -> boolean, FilterExprAndExpr(children: FilterDoubleColLessDoubleColumn(col 12, col 4)(children: CastLongToFloatViaLongToDouble(col 1) -> 12:double) -> boolean, FilterDoubleColGreaterDoubleScalar(col 12, val -5.0)(children: CastTimestampToDouble(col 9) -> 12:double) -> boolean, FilterDoubleColNotEqualDoubleColumn(col 5, col 12)(children: CastLongToDouble(col 2) -> 12:double) -> boolean) -> boolean, FilterStringGroupColEqualStringScalar(col 6, val a) -> boolean, FilterExprAndExpr(children: FilterDecimalColLessEqualDecimalScalar(col 13, val -1.389)(children: CastLongToDecimal(col 3) -> 13:decimal(22,3)) -> boolean, FilterStringGroupColNotEqualStringScalar(col 7, val a) -> boolean, FilterDecimalScalarNotEqualDecimalColumn(val 79.553, col 14)(children: CastLongToDecimal(col 2) -> 14:decimal(13,3)) -> boolean, FilterLongColNotEqualLongColumn(col 11, col 10) -> boolean) -> boolean) -> boolean predicate: ((762 = cbigint) or ((UDFToFloat(csmallint) < cfloat) and (UDFToDouble(ctimestamp2) > -5.0) and (cdouble <> UDFToDouble(cint))) or (cstring1 = 'a') or ((CAST( cbigint AS decimal(22,3)) <= -1.389) and (cstring2 <> 'a') and (79.553 <> CAST( cint AS decimal(13,3))) and (cboolean2 <> cboolean1))) (type: boolean) - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cint (type: int), cdouble (type: double), csmallint (type: smallint), cfloat (type: float), ctinyint (type: tinyint) outputColumnNames: cint, cdouble, csmallint, cfloat, ctinyint @@ -109,7 +109,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [2, 5, 1, 4, 0] - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: avg(cint), sum(cdouble), stddev_pop(cint), stddev_samp(csmallint), var_samp(cint), avg(cfloat), stddev_samp(cint), min(ctinyint), count(csmallint) Group By Vectorization: @@ -329,7 +329,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesorc - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] @@ -339,7 +339,7 @@ STAGE PLANS: native: true predicateExpression: FilterExprOrExpr(children: FilterExprAndExpr(children: FilterLongColLessEqualLongScalar(col 3, val 197) -> boolean, FilterLongColLessLongColumn(col 2, col 3)(children: col 2) -> boolean) -> boolean, FilterExprAndExpr(children: FilterDoubleColGreaterEqualDoubleScalar(col 5, val -26.28) -> boolean, FilterDoubleColGreaterDoubleColumn(col 12, col 5)(children: CastLongToDouble(col 1) -> 12:double) -> boolean) -> boolean, FilterExprAndExpr(children: FilterDoubleColGreaterDoubleColumn(col 12, col 4)(children: CastLongToFloatViaLongToDouble(col 0) -> 12:double) -> boolean, FilterStringColRegExpStringScalar(col 6, pattern .*ss.*) -> boolean) -> boolean, FilterExprAndExpr(children: FilterDoubleColGreaterDoubleScalar(col 4, val 79.5530014038086) -> boolean, FilterStringColLikeStringScalar(col 7, pattern 10%) -> boolean) -> boolean) -> boolean predicate: (((cbigint <= 197) and (UDFToLong(cint) < cbigint)) or ((cdouble >= -26.28) and (UDFToDouble(csmallint) > cdouble)) or ((UDFToFloat(ctinyint) > cfloat) and cstring1 regexp '.*ss.*') or ((cfloat > 79.553) and (cstring2 like '10%'))) (type: boolean) - Statistics: Num rows: 6826 Data size: 209555 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6826 Data size: 1467614 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cint (type: int), cbigint (type: bigint), csmallint (type: smallint), cdouble (type: double), ctinyint (type: tinyint) outputColumnNames: cint, cbigint, csmallint, cdouble, ctinyint @@ -347,7 +347,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [2, 3, 1, 5, 0] - Statistics: Num rows: 6826 Data size: 209555 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6826 Data size: 1467614 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: max(cint), var_pop(cbigint), stddev_pop(csmallint), max(cdouble), avg(ctinyint), min(cint), min(cdouble), stddev_samp(csmallint), var_samp(cint) Group By Vectorization: @@ -559,7 +559,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesorc - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] @@ -569,7 +569,7 @@ STAGE PLANS: native: true predicateExpression: FilterExprOrExpr(children: FilterTimestampColEqualTimestampColumn(col 8, col 9) -> boolean, FilterDoubleScalarEqualDoubleColumn(val 762.0, col 4) -> boolean, FilterStringGroupColEqualStringScalar(col 6, val ss) -> boolean, FilterExprAndExpr(children: FilterLongColLessEqualLongColumn(col 1, col 3)(children: col 1) -> boolean, FilterLongScalarEqualLongColumn(val 1, col 11) -> boolean) -> boolean, FilterExprAndExpr(children: SelectColumnIsNotNull(col 10) -> boolean, SelectColumnIsNotNull(col 9) -> boolean, FilterStringGroupColGreaterStringScalar(col 7, val a) -> boolean) -> boolean) -> boolean predicate: ((ctimestamp1 = ctimestamp2) or (762 = cfloat) or (cstring1 = 'ss') or ((UDFToLong(csmallint) <= cbigint) and (1 = cboolean2)) or (cboolean1 is not null and ctimestamp2 is not null and (cstring2 > 'a'))) (type: boolean) - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cbigint (type: bigint), ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cdouble (type: double) outputColumnNames: cbigint, ctinyint, csmallint, cint, cdouble @@ -577,7 +577,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [3, 0, 1, 2, 5] - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: var_pop(cbigint), count(), max(ctinyint), stddev_pop(csmallint), max(cint), stddev_samp(cdouble), count(ctinyint), avg(ctinyint) Group By Vectorization: @@ -768,7 +768,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesorc - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] @@ -778,7 +778,7 @@ STAGE PLANS: native: true predicateExpression: FilterExprOrExpr(children: FilterExprAndExpr(children: FilterTimestampColLessEqualTimestampColumn(col 9, col 8) -> boolean, FilterDoubleColNotEqualDoubleColumn(col 12, col 5)(children: CastLongToDouble(col 3) -> 12:double) -> boolean, FilterStringScalarLessEqualStringGroupColumn(val ss, col 6) -> boolean) -> boolean, FilterExprAndExpr(children: FilterLongColLessLongColumn(col 1, col 0)(children: col 0) -> boolean, FilterDoubleColGreaterEqualDoubleScalar(col 12, val 0.0)(children: CastTimestampToDouble(col 8) -> 12:double) -> boolean) -> boolean, FilterDoubleColEqualDoubleScalar(col 4, val 17.0) -> boolean) -> boolean predicate: (((ctimestamp2 <= ctimestamp1) and (UDFToDouble(cbigint) <> cdouble) and ('ss' <= cstring1)) or ((csmallint < UDFToShort(ctinyint)) and (UDFToDouble(ctimestamp1) >= 0.0)) or (cfloat = 17)) (type: boolean) - Statistics: Num rows: 8874 Data size: 272428 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8874 Data size: 1907941 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ctinyint (type: tinyint), cbigint (type: bigint), cint (type: int), cfloat (type: float) outputColumnNames: ctinyint, cbigint, cint, cfloat @@ -786,7 +786,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [0, 3, 2, 4] - Statistics: Num rows: 8874 Data size: 272428 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8874 Data size: 1907941 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: avg(ctinyint), max(cbigint), stddev_samp(cint), var_pop(cint), var_pop(cbigint), max(cfloat) Group By Vectorization: @@ -985,7 +985,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesorc - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] @@ -995,7 +995,7 @@ STAGE PLANS: native: true predicateExpression: FilterExprOrExpr(children: FilterExprAndExpr(children: FilterStringColRegExpStringScalar(col 6, pattern a.*) -> boolean, FilterStringColLikeStringScalar(col 7, pattern %ss%) -> boolean) -> boolean, FilterExprAndExpr(children: FilterLongScalarNotEqualLongColumn(val 1, col 11) -> boolean, FilterDecimalColLessDecimalScalar(col 12, val 79.553)(children: CastLongToDecimal(col 1) -> 12:decimal(8,3)) -> boolean, FilterLongScalarNotEqualLongColumn(val -257, col 0)(children: col 0) -> boolean) -> boolean, FilterExprAndExpr(children: FilterDoubleColGreaterDoubleColumn(col 5, col 13)(children: CastLongToDouble(col 0) -> 13:double) -> boolean, FilterDoubleColGreaterEqualDoubleColumn(col 4, col 13)(children: CastLongToFloatViaLongToDouble(col 2) -> 13:double) -> boolean) -> boolean, FilterExprAndExpr(children: FilterLongColLessLongColumn(col 2, col 3)(children: col 2) -> boolean, FilterLongColGreaterLongColumn(col 0, col 3)(children: col 0) -> boolean) -> boolean) -> boolean predicate: ((cstring1 regexp 'a.*' and (cstring2 like '%ss%')) or ((1 <> cboolean2) and (CAST( csmallint AS decimal(8,3)) < 79.553) and (-257 <> UDFToInteger(ctinyint))) or ((cdouble > UDFToDouble(ctinyint)) and (cfloat >= UDFToFloat(cint))) or ((UDFToLong(cint) < cbigint) and (UDFToLong(ctinyint) > cbigint))) (type: boolean) - Statistics: Num rows: 9898 Data size: 303864 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 9898 Data size: 2128105 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cint (type: int), cdouble (type: double), ctimestamp2 (type: timestamp), cstring1 (type: string), cboolean2 (type: boolean), ctinyint (type: tinyint), cfloat (type: float), ctimestamp1 (type: timestamp), csmallint (type: smallint), cbigint (type: bigint), (-3728 * cbigint) (type: bigint), (- cint) (type: int), (-863.257 - CAST( cint AS decimal(10,0))) (type: decimal(14,3)), (- csmallint) (type: smallint), (csmallint - (- csmallint)) (type: smallint), ((csmallint - (- csmallint)) + (- csmallint)) (type: smallint), (UDFToDouble(cint) / UDFToDouble(cint)) (type: double), ((-863.257 - CAST( cint AS decimal(10,0))) - -26.28) (type: decimal(15,3)), (- cfloat) (type: float), (cdouble * -89010.0) (type: double), (UDFToDouble(ctinyint) / 988888.0) (type: double), (- ctinyint) (type: tinyint), (79.553 / CAST( ctinyint AS decimal(3,0))) (type: decimal(9,7)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22 @@ -1004,7 +1004,7 @@ STAGE PLANS: native: true projectedOutputColumns: [2, 5, 9, 6, 11, 0, 4, 8, 1, 3, 14, 15, 17, 18, 20, 22, 24, 26, 13, 23, 28, 19, 30] selectExpressions: LongScalarMultiplyLongColumn(val -3728, col 3) -> 14:long, LongColUnaryMinus(col 2) -> 15:long, DecimalScalarSubtractDecimalColumn(val -863.257, col 16)(children: CastLongToDecimal(col 2) -> 16:decimal(10,0)) -> 17:decimal(14,3), LongColUnaryMinus(col 1) -> 18:long, LongColSubtractLongColumn(col 1, col 19)(children: LongColUnaryMinus(col 1) -> 19:long) -> 20:long, LongColAddLongColumn(col 21, col 19)(children: LongColSubtractLongColumn(col 1, col 19)(children: LongColUnaryMinus(col 1) -> 19:long) -> 21:long, LongColUnaryMinus(col 1) -> 19:long) -> 22:long, DoubleColDivideDoubleColumn(col 13, col 23)(children: CastLongToDouble(col 2) -> 13:double, CastLongToDouble(col 2) -> 23:double) -> 24:double, DecimalColSubtractDecimalScalar(col 25, val -26.28)(children: DecimalScalarSubtractDecimalColumn(val -863.257, col 16)(children: CastLongToDecimal(col 2) -> 16:decimal(10,0)) -> 25:decimal(14,3)) -> 26:decimal(15,3), DoubleColUnaryMinus(col 4) -> 13:double, DoubleColMultiplyDoubleScalar(col 5, val -89010.0) -> 23:double, DoubleColDivideDoubleScalar(col 27, val 988888.0)(children: CastLongToDouble(col 0) -> 27:double) -> 28:double, LongColUnaryMinus(col 0) -> 19:long, DecimalScalarDivideDecimalColumn(val 79.553, col 29)(children: CastLongToDecimal(col 0) -> 29:decimal(3,0)) -> 30:decimal(9,7) - Statistics: Num rows: 9898 Data size: 303864 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 9898 Data size: 2128105 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: double), _col2 (type: timestamp), _col3 (type: string), _col4 (type: boolean), _col5 (type: tinyint), _col6 (type: float), _col7 (type: timestamp), _col8 (type: smallint), _col9 (type: bigint), _col10 (type: bigint), _col11 (type: int), _col12 (type: decimal(14,3)), _col13 (type: smallint), _col14 (type: smallint), _col15 (type: smallint), _col16 (type: double), _col17 (type: decimal(15,3)), _col18 (type: float), _col19 (type: double), _col20 (type: double), _col21 (type: tinyint), _col22 (type: decimal(9,7)) sort order: +++++++++++++++++++++++ @@ -1013,7 +1013,7 @@ STAGE PLANS: native: false nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true nativeConditionsNotMet: No TopN IS false, Uniform Hash IS false - Statistics: Num rows: 9898 Data size: 303864 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 9898 Data size: 2128105 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 Execution mode: vectorized Map Vectorization: @@ -1041,19 +1041,19 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22] - Statistics: Num rows: 9898 Data size: 303864 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 9898 Data size: 2128105 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 50 Limit Vectorization: className: VectorLimitOperator native: true - Statistics: Num rows: 50 Data size: 1500 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 50 Data size: 10750 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 50 Data size: 1500 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 50 Data size: 10750 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1282,7 +1282,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesorc - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] @@ -1292,7 +1292,7 @@ STAGE PLANS: native: true predicateExpression: FilterExprOrExpr(children: FilterExprAndExpr(children: FilterLongScalarGreaterLongColumn(val 197, col 0)(children: col 0) -> boolean, FilterLongColEqualLongColumn(col 2, col 3)(children: col 2) -> boolean) -> boolean, FilterLongColEqualLongScalar(col 3, val 359) -> boolean, FilterLongColLessLongScalar(col 10, val 0) -> boolean, FilterExprAndExpr(children: FilterStringColLikeStringScalar(col 6, pattern %ss) -> boolean, FilterDoubleColLessEqualDoubleColumn(col 4, col 12)(children: CastLongToFloatViaLongToDouble(col 0) -> 12:double) -> boolean) -> boolean) -> boolean predicate: (((197 > UDFToInteger(ctinyint)) and (UDFToLong(cint) = cbigint)) or (cbigint = 359) or (cboolean1 < 0) or ((cstring1 like '%ss') and (cfloat <= UDFToFloat(ctinyint)))) (type: boolean) - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cint (type: int), cbigint (type: bigint), cstring1 (type: string), cboolean1 (type: boolean), cfloat (type: float), cdouble (type: double), ctimestamp2 (type: timestamp), csmallint (type: smallint), cstring2 (type: string), cboolean2 (type: boolean), (UDFToDouble(cint) / UDFToDouble(cbigint)) (type: double), (CAST( cbigint AS decimal(19,0)) % 79.553) (type: decimal(5,3)), (- (UDFToDouble(cint) / UDFToDouble(cbigint))) (type: double), (10.175 % cfloat) (type: float), (- cfloat) (type: float), (cfloat - (- cfloat)) (type: float), ((cfloat - (- cfloat)) % -6432.0) (type: float), (cdouble * UDFToDouble(csmallint)) (type: double), (- cdouble) (type: double), (- cbigint) (type: bigint), (UDFToDouble(cfloat) - (UDFToDouble(cint) / UDFToDouble(cbigint))) (type: double), (- csmallint) (type: smallint), (3569 % cbigint) (type: bigint), (359.0 - cdouble) (type: double), (- csmallint) (type: smallint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24 @@ -1301,7 +1301,7 @@ STAGE PLANS: native: true projectedOutputColumns: [2, 3, 6, 10, 4, 5, 9, 1, 7, 11, 14, 16, 12, 13, 17, 19, 18, 21, 20, 22, 23, 26, 27, 24, 28] selectExpressions: DoubleColDivideDoubleColumn(col 12, col 13)(children: CastLongToDouble(col 2) -> 12:double, CastLongToDouble(col 3) -> 13:double) -> 14:double, DecimalColModuloDecimalScalar(col 15, val 79.553)(children: CastLongToDecimal(col 3) -> 15:decimal(19,0)) -> 16:decimal(5,3), DoubleColUnaryMinus(col 17)(children: DoubleColDivideDoubleColumn(col 12, col 13)(children: CastLongToDouble(col 2) -> 12:double, CastLongToDouble(col 3) -> 13:double) -> 17:double) -> 12:double, DoubleScalarModuloDoubleColumn(val 10.175000190734863, col 4) -> 13:double, DoubleColUnaryMinus(col 4) -> 17:double, DoubleColSubtractDoubleColumn(col 4, col 18)(children: DoubleColUnaryMinus(col 4) -> 18:double) -> 19:double, DoubleColModuloDoubleScalar(col 20, val -6432.0)(children: DoubleColSubtractDoubleColumn(col 4, col 18)(children: DoubleColUnaryMinus(col 4) -> 18:double) -> 20:double) -> 18:double, DoubleColMultiplyDoubleColumn(col 5, col 20)(children: CastLongToDouble(col 1) -> 20:double) -> 21:double, DoubleColUnaryMinus(col 5) -> 20:double, LongColUnaryMinus(col 3) -> 22:long, DoubleColSubtractDoubleColumn(col 4, col 25)(children: col 4, DoubleColDivideDoubleColumn(col 23, col 24)(children: CastLongToDouble(col 2) -> 23:double, CastLongToDouble(col 3) -> 24:double) -> 25:double) -> 23:double, LongColUnaryMinus(col 1) -> 26:long, LongScalarModuloLongColumn(val 3569, col 3) -> 27:long, DoubleScalarSubtractDoubleColumn(val 359.0, col 5) -> 24:double, LongColUnaryMinus(col 1) -> 28:long - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: bigint), _col2 (type: string), _col3 (type: boolean), _col4 (type: float), _col5 (type: double), _col6 (type: timestamp), _col7 (type: smallint), _col8 (type: string), _col9 (type: boolean), _col10 (type: double), _col11 (type: decimal(5,3)), _col12 (type: double), _col13 (type: float), _col14 (type: float), _col15 (type: float), _col16 (type: float), _col17 (type: double), _col18 (type: double), _col19 (type: bigint), _col20 (type: double), _col21 (type: smallint), _col22 (type: bigint), _col23 (type: double), _col24 (type: smallint) sort order: +++++++++++++++++++++++++ @@ -1310,7 +1310,7 @@ STAGE PLANS: native: false nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true nativeConditionsNotMet: No TopN IS false, Uniform Hash IS false - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 Execution mode: vectorized Map Vectorization: @@ -1338,19 +1338,19 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 21] - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 25 Limit Vectorization: className: VectorLimitOperator native: true - Statistics: Num rows: 25 Data size: 750 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 5375 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 25 Data size: 750 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 5375 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1528,7 +1528,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesorc - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] @@ -1538,7 +1538,7 @@ STAGE PLANS: native: true predicateExpression: FilterExprOrExpr(children: FilterExprAndExpr(children: FilterDecimalColGreaterDecimalScalar(col 12, val -26.28)(children: CastLongToDecimal(col 1) -> 12:decimal(7,2)) -> boolean, FilterStringColLikeStringScalar(col 7, pattern ss) -> boolean) -> boolean, FilterExprAndExpr(children: FilterDoubleColLessEqualDoubleColumn(col 5, col 13)(children: CastLongToDouble(col 3) -> 13:double) -> boolean, FilterStringGroupColGreaterEqualStringScalar(col 6, val ss) -> boolean, FilterDoubleColNotEqualDoubleColumn(col 13, col 5)(children: CastLongToDouble(col 2) -> 13:double) -> boolean) -> boolean, FilterLongColEqualLongScalar(col 0, val -89010)(children: col 0) -> boolean, FilterExprAndExpr(children: FilterDoubleColLessEqualDoubleColumn(col 13, col 4)(children: CastLongToFloatViaLongToDouble(col 3) -> 13:double) -> boolean, FilterDecimalScalarLessEqualDecimalColumn(val -26.28, col 12)(children: CastLongToDecimal(col 1) -> 12:decimal(7,2)) -> boolean) -> boolean) -> boolean predicate: (((CAST( csmallint AS decimal(7,2)) > -26.28) and (cstring2 like 'ss')) or ((cdouble <= UDFToDouble(cbigint)) and (cstring1 >= 'ss') and (UDFToDouble(cint) <> cdouble)) or (UDFToInteger(ctinyint) = -89010) or ((UDFToFloat(cbigint) <= cfloat) and (-26.28 <= CAST( csmallint AS decimal(7,2))))) (type: boolean) - Statistics: Num rows: 10922 Data size: 335301 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10922 Data size: 2348269 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cint (type: int), cstring1 (type: string), cboolean2 (type: boolean), ctimestamp2 (type: timestamp), cdouble (type: double), cfloat (type: float), cbigint (type: bigint), csmallint (type: smallint), cboolean1 (type: boolean), (cint + UDFToInteger(csmallint)) (type: int), (cbigint - UDFToLong(ctinyint)) (type: bigint), (- cbigint) (type: bigint), (- cfloat) (type: float), ((cbigint - UDFToLong(ctinyint)) + cbigint) (type: bigint), (cdouble / cdouble) (type: double), (- cdouble) (type: double), (UDFToLong((cint + UDFToInteger(csmallint))) * (- cbigint)) (type: bigint), ((- cdouble) + UDFToDouble(cbigint)) (type: double), (-1.389 / CAST( ctinyint AS decimal(3,0))) (type: decimal(8,7)), (UDFToDouble(cbigint) % cdouble) (type: double), (- csmallint) (type: smallint), (UDFToInteger(csmallint) + (cint + UDFToInteger(csmallint))) (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21 @@ -1547,7 +1547,7 @@ STAGE PLANS: native: true projectedOutputColumns: [2, 6, 11, 9, 5, 4, 3, 1, 10, 14, 15, 16, 13, 18, 19, 20, 22, 25, 27, 24, 17, 28] selectExpressions: LongColAddLongColumn(col 2, col 1)(children: col 1) -> 14:long, LongColSubtractLongColumn(col 3, col 0)(children: col 0) -> 15:long, LongColUnaryMinus(col 3) -> 16:long, DoubleColUnaryMinus(col 4) -> 13:double, LongColAddLongColumn(col 17, col 3)(children: LongColSubtractLongColumn(col 3, col 0)(children: col 0) -> 17:long) -> 18:long, DoubleColDivideDoubleColumn(col 5, col 5) -> 19:double, DoubleColUnaryMinus(col 5) -> 20:double, LongColMultiplyLongColumn(col 17, col 21)(children: col 17, LongColUnaryMinus(col 3) -> 21:long) -> 22:long, DoubleColAddDoubleColumn(col 23, col 24)(children: DoubleColUnaryMinus(col 5) -> 23:double, CastLongToDouble(col 3) -> 24:double) -> 25:double, DecimalScalarDivideDecimalColumn(val -1.389, col 26)(children: CastLongToDecimal(col 0) -> 26:decimal(3,0)) -> 27:decimal(8,7), DoubleColModuloDoubleColumn(col 23, col 5)(children: CastLongToDouble(col 3) -> 23:double) -> 24:double, LongColUnaryMinus(col 1) -> 17:long, LongColAddLongColumn(col 1, col 21)(children: col 1, LongColAddLongColumn(col 2, col 1)(children: col 1) -> 21:long) -> 28:long - Statistics: Num rows: 10922 Data size: 335301 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10922 Data size: 2348269 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col8 (type: boolean), _col1 (type: string), _col3 (type: timestamp), _col5 (type: float), _col6 (type: bigint), _col1 (type: string), _col4 (type: double), _col0 (type: int), _col7 (type: smallint), _col4 (type: double), _col9 (type: int), _col10 (type: bigint), _col11 (type: bigint), _col12 (type: float), _col13 (type: bigint), _col14 (type: double), _col15 (type: double), _col16 (type: bigint), _col17 (type: double), _col18 (type: decimal(8,7)), _col19 (type: double), _col20 (type: smallint), _col21 (type: int) sort order: +++++++++++++++++++++++ @@ -1556,7 +1556,7 @@ STAGE PLANS: native: false nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true nativeConditionsNotMet: No TopN IS false, Uniform Hash IS false - Statistics: Num rows: 10922 Data size: 335301 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10922 Data size: 2348269 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col2 (type: boolean) Execution mode: vectorized @@ -1585,19 +1585,19 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [7, 1, 23, 2, 6, 3, 4, 8, 0, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22] - Statistics: Num rows: 10922 Data size: 335301 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10922 Data size: 2348269 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 75 Limit Vectorization: className: VectorLimitOperator native: true - Statistics: Num rows: 75 Data size: 2250 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 75 Data size: 16125 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 75 Data size: 2250 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 75 Data size: 16125 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1832,7 +1832,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesorc - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] @@ -1842,7 +1842,7 @@ STAGE PLANS: native: true predicateExpression: FilterExprOrExpr(children: FilterExprAndExpr(children: FilterDecimalScalarGreaterEqualDecimalColumn(val -1.389, col 12)(children: CastLongToDecimal(col 2) -> 12:decimal(13,3)) -> boolean, FilterLongColLessLongColumn(col 1, col 0)(children: col 0) -> boolean, FilterLongScalarGreaterLongColumn(val -6432, col 1)(children: col 1) -> boolean) -> boolean, FilterExprAndExpr(children: FilterDoubleColGreaterEqualDoubleColumn(col 5, col 4)(children: col 4) -> boolean, FilterStringGroupColLessEqualStringScalar(col 7, val a) -> boolean) -> boolean, FilterExprAndExpr(children: FilterStringColLikeStringScalar(col 6, pattern ss%) -> boolean, FilterDecimalScalarGreaterDecimalColumn(val 10.175, col 13)(children: CastLongToDecimal(col 3) -> 13:decimal(22,3)) -> boolean) -> boolean) -> boolean predicate: (((-1.389 >= CAST( cint AS decimal(13,3))) and (csmallint < UDFToShort(ctinyint)) and (-6432 > UDFToInteger(csmallint))) or ((cdouble >= UDFToDouble(cfloat)) and (cstring2 <= 'a')) or ((cstring1 like 'ss%') and (10.175 > CAST( cbigint AS decimal(22,3))))) (type: boolean) - Statistics: Num rows: 3868 Data size: 118746 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3868 Data size: 831633 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ctimestamp1 (type: timestamp), cstring2 (type: string), cdouble (type: double), cfloat (type: float), cbigint (type: bigint), csmallint (type: smallint), (UDFToDouble(cbigint) / 3569.0) (type: double), (-257 - UDFToInteger(csmallint)) (type: int), (-6432.0 * cfloat) (type: float), (- cdouble) (type: double), (cdouble * 10.175) (type: double), (UDFToDouble((-6432.0 * cfloat)) / UDFToDouble(cfloat)) (type: double), (- cfloat) (type: float), (cint % UDFToInteger(csmallint)) (type: int), (- cdouble) (type: double), (cdouble * (- cdouble)) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15 @@ -1851,7 +1851,7 @@ STAGE PLANS: native: true projectedOutputColumns: [8, 7, 5, 4, 3, 1, 15, 16, 14, 17, 18, 20, 19, 21, 22, 24] selectExpressions: DoubleColDivideDoubleScalar(col 14, val 3569.0)(children: CastLongToDouble(col 3) -> 14:double) -> 15:double, LongScalarSubtractLongColumn(val -257, col 1)(children: col 1) -> 16:long, DoubleScalarMultiplyDoubleColumn(val -6432.0, col 4) -> 14:double, DoubleColUnaryMinus(col 5) -> 17:double, DoubleColMultiplyDoubleScalar(col 5, val 10.175) -> 18:double, DoubleColDivideDoubleColumn(col 19, col 4)(children: col 19, col 4) -> 20:double, DoubleColUnaryMinus(col 4) -> 19:double, LongColModuloLongColumn(col 2, col 1)(children: col 1) -> 21:long, DoubleColUnaryMinus(col 5) -> 22:double, DoubleColMultiplyDoubleColumn(col 5, col 23)(children: DoubleColUnaryMinus(col 5) -> 23:double) -> 24:double - Statistics: Num rows: 3868 Data size: 118746 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3868 Data size: 831633 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col5 (type: smallint), _col1 (type: string), _col2 (type: double), _col3 (type: float), _col4 (type: bigint), _col6 (type: double), _col7 (type: int), _col8 (type: float), _col9 (type: double), _col10 (type: double), _col11 (type: double), _col12 (type: float), _col13 (type: int), _col14 (type: double), _col15 (type: double) sort order: +++++++++++++++ @@ -1860,7 +1860,7 @@ STAGE PLANS: native: false nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true nativeConditionsNotMet: No TopN IS false, Uniform Hash IS false - Statistics: Num rows: 3868 Data size: 118746 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3868 Data size: 831633 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: timestamp) Execution mode: vectorized @@ -1889,19 +1889,19 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [15, 1, 2, 3, 4, 0, 5, 6, 7, 8, 9, 10, 11, 12, 8, 14] - Statistics: Num rows: 3868 Data size: 118746 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3868 Data size: 831633 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 45 Limit Vectorization: className: VectorLimitOperator native: true - Statistics: Num rows: 45 Data size: 1350 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 45 Data size: 9675 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 45 Data size: 1350 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 45 Data size: 9675 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -2078,7 +2078,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesorc - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] @@ -2088,7 +2088,7 @@ STAGE PLANS: native: true predicateExpression: FilterExprAndExpr(children: FilterLongColGreaterEqualLongScalar(col 1, val -257)(children: col 1) -> boolean, FilterExprOrExpr(children: FilterLongScalarEqualLongColumn(val -6432, col 1)(children: col 1) -> boolean, FilterExprAndExpr(children: FilterDoubleColGreaterEqualDoubleColumn(col 12, col 5)(children: CastLongToDouble(col 2) -> 12:double) -> boolean, FilterLongColLessEqualLongColumn(col 0, col 2)(children: col 0) -> boolean) -> boolean) -> boolean) -> boolean predicate: ((UDFToInteger(csmallint) >= -257) and ((-6432 = UDFToInteger(csmallint)) or ((UDFToDouble(cint) >= cdouble) and (UDFToInteger(ctinyint) <= cint)))) (type: boolean) - Statistics: Num rows: 2503 Data size: 76841 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2503 Data size: 538153 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: csmallint (type: smallint), cbigint (type: bigint), ctinyint (type: tinyint) outputColumnNames: csmallint, cbigint, ctinyint @@ -2096,7 +2096,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [1, 3, 0] - Statistics: Num rows: 2503 Data size: 76841 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2503 Data size: 538153 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: stddev_samp(csmallint), sum(cbigint), var_pop(ctinyint), count() Group By Vectorization: @@ -2110,12 +2110,12 @@ STAGE PLANS: keys: csmallint (type: smallint) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 2503 Data size: 76841 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2503 Data size: 538153 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: smallint) sort order: + Map-reduce partition columns: _col0 (type: smallint) - Statistics: Num rows: 2503 Data size: 76841 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2503 Data size: 538153 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: struct), _col2 (type: bigint), _col3 (type: struct), _col4 (type: bigint) Execution mode: vectorized Map Vectorization: @@ -2138,15 +2138,15 @@ STAGE PLANS: keys: KEY._col0 (type: smallint) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1251 Data size: 38405 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1251 Data size: 268968 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: smallint), (UDFToInteger(_col0) % -75) (type: int), _col1 (type: double), (-1.389 / CAST( _col0 AS decimal(5,0))) (type: decimal(10,9)), _col2 (type: bigint), (UDFToDouble((UDFToInteger(_col0) % -75)) / UDFToDouble(_col2)) (type: double), (- (UDFToInteger(_col0) % -75)) (type: int), _col3 (type: double), (- (- (UDFToInteger(_col0) % -75))) (type: int), _col4 (type: bigint), (_col4 - -89010) (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 - Statistics: Num rows: 1251 Data size: 38405 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1251 Data size: 268968 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: double), _col3 (type: decimal(10,9)), _col4 (type: bigint), _col5 (type: double), _col6 (type: int), _col7 (type: double), _col8 (type: int), _col9 (type: bigint), _col10 (type: bigint) sort order: +++++++++++ - Statistics: Num rows: 1251 Data size: 38405 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1251 Data size: 268968 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 Reducer 3 Execution mode: vectorized @@ -2165,19 +2165,19 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] - Statistics: Num rows: 1251 Data size: 38405 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1251 Data size: 268968 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 20 Limit Vectorization: className: VectorLimitOperator native: true - Statistics: Num rows: 20 Data size: 600 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 4300 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 20 Data size: 600 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 4300 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -2330,7 +2330,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesorc - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] @@ -2340,7 +2340,7 @@ STAGE PLANS: native: true predicateExpression: FilterExprAndExpr(children: FilterDoubleColGreaterDoubleScalar(col 5, val 2563.58) -> boolean, FilterExprOrExpr(children: FilterExprAndExpr(children: FilterLongColGreaterEqualLongColumn(col 3, col 2)(children: col 2) -> boolean, FilterLongColLessLongColumn(col 1, col 2)(children: col 1) -> boolean, FilterDoubleColLessDoubleScalar(col 4, val -5638.14990234375) -> boolean) -> boolean, FilterDecimalScalarEqualDecimalColumn(val 2563.58, col 12)(children: CastLongToDecimal(col 0) -> 12:decimal(6,2)) -> boolean, FilterExprAndExpr(children: FilterDoubleColLessEqualDoubleColumn(col 5, col 13)(children: CastLongToDouble(col 3) -> 13:double) -> boolean, FilterDecimalScalarGreaterDecimalColumn(val -5638.15, col 14)(children: CastLongToDecimal(col 3) -> 14:decimal(21,2)) -> boolean) -> boolean) -> boolean) -> boolean predicate: ((cdouble > 2563.58) and (((cbigint >= UDFToLong(cint)) and (UDFToInteger(csmallint) < cint) and (cfloat < -5638.15)) or (2563.58 = CAST( ctinyint AS decimal(6,2))) or ((cdouble <= UDFToDouble(cbigint)) and (-5638.15 > CAST( cbigint AS decimal(21,2)))))) (type: boolean) - Statistics: Num rows: 2654 Data size: 81476 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2654 Data size: 570619 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cdouble (type: double), cfloat (type: float) outputColumnNames: cdouble, cfloat @@ -2348,7 +2348,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [5, 4] - Statistics: Num rows: 2654 Data size: 81476 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2654 Data size: 570619 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: var_samp(cdouble), count(cfloat), sum(cfloat), var_pop(cdouble), stddev_pop(cdouble), sum(cdouble) Group By Vectorization: @@ -2362,12 +2362,12 @@ STAGE PLANS: keys: cdouble (type: double) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 2654 Data size: 81476 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2654 Data size: 570619 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: double) sort order: + Map-reduce partition columns: _col0 (type: double) - Statistics: Num rows: 2654 Data size: 81476 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2654 Data size: 570619 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: struct), _col2 (type: bigint), _col3 (type: double), _col4 (type: struct), _col5 (type: struct), _col6 (type: double) Execution mode: vectorized Map Vectorization: @@ -2390,15 +2390,15 @@ STAGE PLANS: keys: KEY._col0 (type: double) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 1327 Data size: 40738 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1327 Data size: 285309 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: double), _col1 (type: double), _col5 (type: double), (_col0 + _col1) (type: double), (_col0 * 762.0) (type: double), _col6 (type: double), (-863.257 % (_col0 * 762.0)) (type: double), (2563.58 * _col1) (type: double), (- _col1) (type: double), _col2 (type: bigint), ((2563.58 * _col1) + -5638.15) (type: double), ((- _col1) * ((2563.58 * _col1) + -5638.15)) (type: double), _col3 (type: double), _col4 (type: double), (_col0 - (- _col1)) (type: double) outputColumnNames: _col0, _col1, _col10, _col11, _col12, _col13, _col14, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 - Statistics: Num rows: 1327 Data size: 40738 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1327 Data size: 285309 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: double) sort order: + - Statistics: Num rows: 1327 Data size: 40738 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1327 Data size: 285309 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: double), _col2 (type: double), _col3 (type: double), _col4 (type: bigint), _col5 (type: double), _col6 (type: double), _col7 (type: double), _col8 (type: double), _col9 (type: double), _col10 (type: double), _col11 (type: double), _col12 (type: double), _col13 (type: double), _col14 (type: double) Reducer 3 Execution mode: vectorized @@ -2417,13 +2417,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 13] - Statistics: Num rows: 1327 Data size: 40738 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1327 Data size: 285309 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 1327 Data size: 40738 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1327 Data size: 285309 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -2626,7 +2626,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesorc - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] @@ -2636,7 +2636,7 @@ STAGE PLANS: native: true predicateExpression: FilterExprAndExpr(children: FilterDoubleColNotEqualDoubleScalar(col 12, val 0.0)(children: CastTimestampToDouble(col 8) -> 12:double) -> boolean, FilterExprOrExpr(children: FilterExprAndExpr(children: FilterLongScalarNotEqualLongColumn(val -257, col 0)(children: col 0) -> boolean, SelectColumnIsNotNull(col 11) -> boolean, FilterStringColRegExpStringScalar(col 6, pattern .*ss) -> boolean, FilterDoubleScalarLessDoubleColumn(val -3.0, col 12)(children: CastTimestampToDouble(col 8) -> 12:double) -> boolean) -> boolean, FilterDoubleColEqualDoubleScalar(col 12, val -5.0)(children: CastTimestampToDouble(col 9) -> 12:double) -> boolean, FilterExprAndExpr(children: FilterDoubleColLessDoubleScalar(col 12, val 0.0)(children: CastTimestampToDouble(col 8) -> 12:double) -> boolean, FilterStringColLikeStringScalar(col 7, pattern %b%) -> boolean) -> boolean, FilterDoubleColEqualDoubleColumn(col 5, col 12)(children: CastLongToDouble(col 2) -> 12:double) -> boolean, FilterExprAndExpr(children: SelectColumnIsNull(col 10) -> boolean, FilterDoubleColLessDoubleColumn(col 4, col 12)(children: CastLongToFloatViaLongToDouble(col 2) -> 12:double) -> boolean) -> boolean) -> boolean) -> boolean predicate: ((UDFToDouble(ctimestamp1) <> 0.0) and (((-257 <> UDFToInteger(ctinyint)) and cboolean2 is not null and cstring1 regexp '.*ss' and (-3.0 < UDFToDouble(ctimestamp1))) or (UDFToDouble(ctimestamp2) = -5.0) or ((UDFToDouble(ctimestamp1) < 0.0) and (cstring2 like '%b%')) or (cdouble = UDFToDouble(cint)) or (cboolean1 is null and (cfloat < UDFToFloat(cint))))) (type: boolean) - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ctimestamp1 (type: timestamp), cstring1 (type: string), cint (type: int), csmallint (type: smallint), ctinyint (type: tinyint), cfloat (type: float), cdouble (type: double) outputColumnNames: ctimestamp1, cstring1, cint, csmallint, ctinyint, cfloat, cdouble @@ -2644,7 +2644,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [8, 6, 2, 1, 0, 4, 5] - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: stddev_pop(cint), avg(csmallint), count(), min(ctinyint), var_samp(csmallint), var_pop(cfloat), avg(cint), var_samp(cfloat), avg(cfloat), min(cdouble), var_pop(csmallint), stddev_pop(ctinyint), sum(cint) Group By Vectorization: @@ -2658,12 +2658,12 @@ STAGE PLANS: keys: ctimestamp1 (type: timestamp), cstring1 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: timestamp), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: timestamp), _col1 (type: string) - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: bigint), _col5 (type: tinyint), _col6 (type: struct), _col7 (type: struct), _col8 (type: struct), _col9 (type: struct), _col10 (type: struct), _col11 (type: double), _col12 (type: struct), _col13 (type: struct), _col14 (type: bigint) Execution mode: vectorized Map Vectorization: @@ -2686,15 +2686,15 @@ STAGE PLANS: keys: KEY._col0 (type: timestamp), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 - Statistics: Num rows: 6144 Data size: 188618 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: timestamp), _col1 (type: string), _col2 (type: double), (_col2 * 10.175) (type: double), (- _col2) (type: double), _col3 (type: double), (- _col2) (type: double), (-26.28 - _col2) (type: double), _col4 (type: bigint), (- _col4) (type: bigint), ((-26.28 - _col2) * (- _col2)) (type: double), _col5 (type: tinyint), (((-26.28 - _col2) * (- _col2)) * UDFToDouble((- _col4))) (type: double), (- (_col2 * 10.175)) (type: double), _col6 (type: double), (_col6 + (((-26.28 - _col2) * (- _col2)) * UDFToDouble((- _col4)))) (type: double), (- (- _col2)) (type: double), (UDFToDouble((- _col4)) / _col2) (type: double), _col7 (type: double), (10.175 / _col3) (type: double), _col8 (type: double), _col9 (type: double), ((_col6 + (((-26.28 - _col2) * (- _col2)) * UDFToDouble((- _col4)))) - (((-26.28 - _col2) * (- _col2)) * UDFToDouble((- _col4)))) (type: double), (- (- (_col2 * 10.175))) (type: double), _col10 (type: double), (((_col6 + (((-26.28 - _col2) * (- _col2)) * UDFToDouble((- _col4)))) - (((-26.28 - _col2) * (- _col2)) * UDFToDouble((- _col4)))) * 10.175) (type: double), (10.175 % (10.175 / _col3)) (type: double), (- _col5) (type: tinyint), _col11 (type: double), _col12 (type: double), (- ((-26.28 - _col2) * (- _col2))) (type: double), ((- _col2) % _col10) (type: double), (-26.28 / CAST( (- _col5) AS decimal(3,0))) (type: decimal(8,6)), _col13 (type: double), _col14 (type: bigint), ((_col6 + (((-26.28 - _col2) * (- _col2)) * UDFToDouble((- _col4)))) / _col7) (type: double), (- (- _col4)) (type: bigint), _col4 (type: bigint), ((_col6 + (((-26.28 - _col2) * (- _col2)) * UDFToDouble((- _col4)))) % -26.28) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35, _col36, _col37, _col38 - Statistics: Num rows: 6144 Data size: 188618 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: timestamp), _col1 (type: string), _col2 (type: double), _col3 (type: double), _col4 (type: double), _col5 (type: double), _col6 (type: double), _col7 (type: double), _col8 (type: bigint), _col9 (type: bigint), _col10 (type: double), _col11 (type: tinyint), _col12 (type: double), _col13 (type: double), _col14 (type: double), _col15 (type: double), _col16 (type: double), _col17 (type: double), _col18 (type: double), _col19 (type: double), _col20 (type: double), _col21 (type: double), _col22 (type: double), _col23 (type: double), _col24 (type: double), _col25 (type: double), _col26 (type: double), _col27 (type: tinyint), _col28 (type: double), _col29 (type: double), _col30 (type: double), _col31 (type: double), _col32 (type: decimal(8,6)), _col33 (type: double), _col34 (type: bigint), _col35 (type: double), _col36 (type: bigint), _col37 (type: bigint), _col38 (type: double) sort order: +++++++++++++++++++++++++++++++++++++++ - Statistics: Num rows: 6144 Data size: 188618 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 Reducer 3 Execution mode: vectorized @@ -2713,19 +2713,19 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [0, 1, 2, 3, 4, 5, 4, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 8, 38] - Statistics: Num rows: 6144 Data size: 188618 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 50 Limit Vectorization: className: VectorLimitOperator native: true - Statistics: Num rows: 50 Data size: 1500 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 50 Data size: 10750 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 50 Data size: 1500 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 50 Data size: 10750 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -3003,7 +3003,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesorc - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] @@ -3013,7 +3013,7 @@ STAGE PLANS: native: true predicateExpression: FilterExprAndExpr(children: FilterExprOrExpr(children: FilterExprAndExpr(children: FilterDoubleColLessDoubleColumn(col 5, col 12)(children: CastLongToDouble(col 1) -> 12:double) -> boolean, FilterLongColEqualLongColumn(col 11, col 10) -> boolean, FilterDecimalColLessEqualDecimalScalar(col 13, val -863.257)(children: CastLongToDecimal(col 3) -> 13:decimal(22,3)) -> boolean) -> boolean, FilterExprAndExpr(children: FilterLongColGreaterEqualLongScalar(col 2, val -257) -> boolean, SelectColumnIsNotNull(col 6) -> boolean, FilterLongColGreaterEqualLongScalar(col 10, val 1) -> boolean) -> boolean, FilterStringColRegExpStringScalar(col 7, pattern b) -> boolean, FilterExprAndExpr(children: FilterLongColGreaterEqualLongColumn(col 1, col 0)(children: col 0) -> boolean, SelectColumnIsNull(col 9) -> boolean) -> boolean) -> boolean, SelectColumnIsNotNull(col 10) -> boolean) -> boolean predicate: ((((cdouble < UDFToDouble(csmallint)) and (cboolean2 = cboolean1) and (CAST( cbigint AS decimal(22,3)) <= -863.257)) or ((cint >= -257) and cstring1 is not null and (cboolean1 >= 1)) or cstring2 regexp 'b' or ((csmallint >= UDFToShort(ctinyint)) and ctimestamp2 is null)) and cboolean1 is not null) (type: boolean) - Statistics: Num rows: 10239 Data size: 314333 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10239 Data size: 2201421 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cboolean1 (type: boolean), cfloat (type: float), cbigint (type: bigint), cint (type: int), cdouble (type: double), ctinyint (type: tinyint), csmallint (type: smallint) outputColumnNames: cboolean1, cfloat, cbigint, cint, cdouble, ctinyint, csmallint @@ -3021,7 +3021,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [10, 4, 3, 2, 5, 0, 1] - Statistics: Num rows: 10239 Data size: 314333 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10239 Data size: 2201421 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: max(cfloat), sum(cbigint), var_samp(cint), avg(cdouble), min(cbigint), var_pop(cbigint), sum(cint), stddev_samp(ctinyint), stddev_pop(csmallint), avg(cint) Group By Vectorization: @@ -3035,12 +3035,12 @@ STAGE PLANS: keys: cboolean1 (type: boolean) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 - Statistics: Num rows: 10239 Data size: 314333 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10239 Data size: 2201421 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: boolean) sort order: + Map-reduce partition columns: _col0 (type: boolean) - Statistics: Num rows: 10239 Data size: 314333 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10239 Data size: 2201421 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: float), _col2 (type: bigint), _col3 (type: struct), _col4 (type: struct), _col5 (type: bigint), _col6 (type: struct), _col7 (type: bigint), _col8 (type: struct), _col9 (type: struct), _col10 (type: struct) Execution mode: vectorized Map Vectorization: @@ -3063,15 +3063,15 @@ STAGE PLANS: keys: KEY._col0 (type: boolean) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 - Statistics: Num rows: 5119 Data size: 157151 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5119 Data size: 1100602 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: boolean), _col1 (type: float), (UDFToDouble((CAST( _col2 AS decimal(19,0)) - 10.175)) + _col3) (type: double), _col5 (type: bigint), _col6 (type: double), (- (10.175 + (- _col1))) (type: float), (79.553 / _col6) (type: double), (_col3 % (79.553 / _col6)) (type: double), _col7 (type: bigint), _col8 (type: double), (-1.389 * CAST( _col5 AS decimal(19,0))) (type: decimal(24,3)), (- _col1) (type: float), (CAST( _col7 AS decimal(19,0)) - (-1.389 * CAST( _col5 AS decimal(19,0)))) (type: decimal(25,3)), _col9 (type: double), (- (CAST( _col7 AS decimal(19,0)) - (-1.389 * CAST( _col5 AS decimal(19,0))))) (type: decimal(25,3)), _col10 (type: double), (- _col10) (type: double), (_col10 * UDFToDouble(_col7)) (type: double), (-26.28 / UDFToDouble(_col1)) (type: double), _col2 (type: bigint), (CAST( _col2 AS decimal(19,0)) - 10.175) (type: decimal(23,3)), _col3 (type: double), (_col3 % UDFToDouble(_col1)) (type: double), (10.175 + (- _col1)) (type: float), _col4 (type: double) outputColumnNames: _col0, _col1, _col10, _col11, _col12, _col13, _col14, _col15, _col17, _col18, _col19, _col2, _col20, _col21, _col22, _col23, _col24, _col25, _col3, _col4, _col5, _col6, _col7, _col8, _col9 - Statistics: Num rows: 5119 Data size: 157151 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5119 Data size: 1100602 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: boolean) sort order: + - Statistics: Num rows: 5119 Data size: 157151 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5119 Data size: 1100602 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: float), _col2 (type: float), _col3 (type: double), _col4 (type: bigint), _col5 (type: decimal(23,3)), _col6 (type: double), _col7 (type: double), _col8 (type: float), _col9 (type: double), _col10 (type: double), _col11 (type: bigint), _col12 (type: double), _col13 (type: float), _col14 (type: double), _col15 (type: double), _col17 (type: bigint), _col18 (type: double), _col19 (type: decimal(24,3)), _col20 (type: decimal(25,3)), _col21 (type: double), _col22 (type: decimal(25,3)), _col23 (type: double), _col24 (type: double), _col25 (type: double) Reducer 3 Execution mode: vectorized @@ -3090,13 +3090,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 13, 16, 17, 18, 19, 20, 21, 22, 23, 24] - Statistics: Num rows: 5119 Data size: 157151 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5119 Data size: 1100602 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 5119 Data size: 157151 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5119 Data size: 1100602 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/spark/vectorized_case.q.out b/ql/src/test/results/clientpositive/spark/vectorized_case.q.out index 67ed3dd..b99af0f 100644 --- a/ql/src/test/results/clientpositive/spark/vectorized_case.q.out +++ b/ql/src/test/results/clientpositive/spark/vectorized_case.q.out @@ -51,7 +51,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesorc - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] @@ -61,7 +61,7 @@ STAGE PLANS: native: true predicateExpression: FilterExprOrExpr(children: FilterLongColEqualLongScalar(col 1, val 418) -> boolean, FilterLongColEqualLongScalar(col 1, val 12205) -> boolean, FilterLongColEqualLongScalar(col 1, val 10583) -> boolean) -> boolean predicate: ((csmallint = 418) or (csmallint = 12205) or (csmallint = 10583)) (type: boolean) - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: csmallint (type: smallint), CASE WHEN ((csmallint = 418)) THEN ('a') WHEN ((csmallint = 12205)) THEN ('b') ELSE ('c') END (type: string), CASE WHEN ((csmallint = 418)) THEN ('a') WHEN ((csmallint = 12205)) THEN ('b') ELSE ('c') END (type: string) outputColumnNames: _col0, _col1, _col2 @@ -70,13 +70,13 @@ STAGE PLANS: native: true projectedOutputColumns: [1, 14, 15] selectExpressions: VectorUDFAdaptor(CASE WHEN ((csmallint = 418)) THEN ('a') WHEN ((csmallint = 12205)) THEN ('b') ELSE ('c') END)(children: LongColEqualLongScalar(col 1, val 418) -> 12:long, LongColEqualLongScalar(col 1, val 12205) -> 13:long) -> 14:string, VectorUDFAdaptor(CASE WHEN ((csmallint = 418)) THEN ('a') WHEN ((csmallint = 12205)) THEN ('b') ELSE ('c') END)(children: LongColEqualLongScalar(col 1, val 418) -> 12:long, LongColEqualLongScalar(col 1, val 12205) -> 13:long) -> 15:string - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -192,7 +192,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesorc - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] @@ -202,7 +202,7 @@ STAGE PLANS: native: true predicateExpression: FilterExprOrExpr(children: FilterLongColEqualLongScalar(col 1, val 418) -> boolean, FilterLongColEqualLongScalar(col 1, val 12205) -> boolean, FilterLongColEqualLongScalar(col 1, val 10583) -> boolean) -> boolean predicate: ((csmallint = 418) or (csmallint = 12205) or (csmallint = 10583)) (type: boolean) - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: csmallint (type: smallint), CASE WHEN ((csmallint = 418)) THEN ('a') WHEN ((csmallint = 12205)) THEN ('b') ELSE (null) END (type: string), CASE WHEN ((csmallint = 418)) THEN ('a') WHEN ((csmallint = 12205)) THEN (null) ELSE ('c') END (type: string) outputColumnNames: _col0, _col1, _col2 @@ -211,13 +211,13 @@ STAGE PLANS: native: true projectedOutputColumns: [1, 14, 15] selectExpressions: VectorUDFAdaptor(CASE WHEN ((csmallint = 418)) THEN ('a') WHEN ((csmallint = 12205)) THEN ('b') ELSE (null) END)(children: LongColEqualLongScalar(col 1, val 418) -> 12:long, LongColEqualLongScalar(col 1, val 12205) -> 13:long) -> 14:string, VectorUDFAdaptor(CASE WHEN ((csmallint = 418)) THEN ('a') WHEN ((csmallint = 12205)) THEN (null) ELSE ('c') END)(children: LongColEqualLongScalar(col 1, val 418) -> 12:long, LongColEqualLongScalar(col 1, val 12205) -> 13:long) -> 15:string - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/spark/vectorized_mapjoin.q.out b/ql/src/test/results/clientpositive/spark/vectorized_mapjoin.q.out index 030a71b..c69fcab 100644 --- a/ql/src/test/results/clientpositive/spark/vectorized_mapjoin.q.out +++ b/ql/src/test/results/clientpositive/spark/vectorized_mapjoin.q.out @@ -24,7 +24,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: t2 - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] @@ -34,7 +34,7 @@ STAGE PLANS: native: true predicateExpression: SelectColumnIsNotNull(col 2) -> boolean predicate: cint is not null (type: boolean) - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cint (type: int) outputColumnNames: _col0 @@ -42,7 +42,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [2] - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator Spark Hash Table Sink Vectorization: className: VectorSparkHashTableSinkOperator @@ -72,7 +72,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: t1 - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] @@ -82,7 +82,7 @@ STAGE PLANS: native: true predicateExpression: SelectColumnIsNotNull(col 2) -> boolean predicate: cint is not null (type: boolean) - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cint (type: int) outputColumnNames: _col0 @@ -90,7 +90,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [2] - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 @@ -104,7 +104,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1 input vertices: 1 Map 3 - Statistics: Num rows: 13516 Data size: 414960 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13516 Data size: 2906160 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: int), (_col0 + _col1) (type: int) outputColumnNames: _col0, _col1, _col2 @@ -113,7 +113,7 @@ STAGE PLANS: native: true projectedOutputColumns: [2, 2, 12] selectExpressions: LongColAddLongColumn(col 2, col 2) -> 12:long - Statistics: Num rows: 13516 Data size: 414960 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13516 Data size: 2906160 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(_col0), max(_col1), min(_col0), avg(_col2) Group By Vectorization: diff --git a/ql/src/test/results/clientpositive/spark/vectorized_math_funcs.q.out b/ql/src/test/results/clientpositive/spark/vectorized_math_funcs.q.out index 90ef576..3780a4a 100644 --- a/ql/src/test/results/clientpositive/spark/vectorized_math_funcs.q.out +++ b/ql/src/test/results/clientpositive/spark/vectorized_math_funcs.q.out @@ -119,7 +119,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesorc - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] @@ -129,7 +129,7 @@ STAGE PLANS: native: true predicateExpression: FilterExprAndExpr(children: FilterLongColEqualLongScalar(col 12, val 0)(children: LongColModuloLongScalar(col 3, val 500) -> 12:long) -> boolean, FilterDoubleColGreaterEqualDoubleScalar(col 13, val -1.0)(children: FuncSinDoubleToDouble(col 4) -> 13:double) -> boolean) -> boolean predicate: (((cbigint % 500) = 0) and (sin(cfloat) >= -1.0)) (type: boolean) - Statistics: Num rows: 2048 Data size: 62872 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2048 Data size: 440327 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cdouble (type: double), round(cdouble, 2) (type: double), floor(cdouble) (type: bigint), ceil(cdouble) (type: bigint), rand() (type: double), rand(98007) (type: double), exp(ln(cdouble)) (type: double), ln(cdouble) (type: double), ln(cfloat) (type: double), log10(cdouble) (type: double), log2(cdouble) (type: double), log2((cdouble - 15601.0)) (type: double), log2(cfloat) (type: double), log2(cbigint) (type: double), log2(cint) (type: double), log2(csmallint) (type: double), log2(ctinyint) (type: double), log(2, cdouble) (type: double), power(log2(cdouble), 2) (type: double), power(log2(cdouble), 2) (type: double), sqrt(cdouble) (type: double), sqrt(cbigint) (type: double), bin(cbigint) (type: string), hex(cdouble) (type: string), conv(cbigint, 10, 16) (type: string), abs(cdouble) (type: double), abs(ctinyint) (type: int), (cint pmod 3) (type: int), sin(cdouble) (type: double), asin(cdouble) (type: double), cos(cdouble) (type: double), acos(cdouble) (type: double), atan(cdouble) (type: double), degrees(cdouble) (type: double), radians(cdouble) (type: double), cdouble (type: double), cbigint (type: bigint), (- cdouble) (type: double), sign(cdouble) (type: double), sign(cbigint) (type: double), cos(((- sin(log(cdouble))) + 3.14159)) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35, _col36, _col37, _col38, _col39, _col40 @@ -138,13 +138,13 @@ STAGE PLANS: native: true projectedOutputColumns: [5, 13, 12, 14, 15, 16, 18, 17, 19, 20, 21, 23, 22, 24, 25, 26, 27, 28, 30, 31, 29, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 5, 3, 46, 47, 48, 49] selectExpressions: RoundWithNumDigitsDoubleToDouble(col 5, decimalPlaces 2) -> 13:double, FuncFloorDoubleToLong(col 5) -> 12:long, FuncCeilDoubleToLong(col 5) -> 14:long, FuncRandNoSeed -> 15:double, FuncRand -> 16:double, FuncExpDoubleToDouble(col 17)(children: FuncLnDoubleToDouble(col 5) -> 17:double) -> 18:double, FuncLnDoubleToDouble(col 5) -> 17:double, FuncLnDoubleToDouble(col 4) -> 19:double, FuncLog10DoubleToDouble(col 5) -> 20:double, FuncLog2DoubleToDouble(col 5) -> 21:double, FuncLog2DoubleToDouble(col 22)(children: DoubleColSubtractDoubleScalar(col 5, val 15601.0) -> 22:double) -> 23:double, FuncLog2DoubleToDouble(col 4) -> 22:double, FuncLog2LongToDouble(col 3) -> 24:double, FuncLog2LongToDouble(col 2) -> 25:double, FuncLog2LongToDouble(col 1) -> 26:double, FuncLog2LongToDouble(col 0) -> 27:double, VectorUDFAdaptor(log(2, cdouble)) -> 28:double, VectorUDFAdaptor(power(log2(cdouble), 2))(children: FuncLog2DoubleToDouble(col 5) -> 29:double) -> 30:double, VectorUDFAdaptor(power(log2(cdouble), 2))(children: FuncLog2DoubleToDouble(col 5) -> 29:double) -> 31:double, FuncSqrtDoubleToDouble(col 5) -> 29:double, FuncSqrtLongToDouble(col 3) -> 32:double, FuncBin(col 3) -> 33:String, VectorUDFAdaptor(hex(cdouble)) -> 34:string, VectorUDFAdaptor(conv(cbigint, 10, 16)) -> 35:string, FuncAbsDoubleToDouble(col 5) -> 36:double, FuncAbsLongToLong(col 0) -> 37:long, PosModLongToLong(col 2, divisor 3) -> 38:long, FuncSinDoubleToDouble(col 5) -> 39:double, FuncASinDoubleToDouble(col 5) -> 40:double, FuncCosDoubleToDouble(col 5) -> 41:double, FuncACosDoubleToDouble(col 5) -> 42:double, FuncATanDoubleToDouble(col 5) -> 43:double, FuncDegreesDoubleToDouble(col 5) -> 44:double, FuncRadiansDoubleToDouble(col 5) -> 45:double, DoubleColUnaryMinus(col 5) -> 46:double, FuncSignDoubleToDouble(col 5) -> 47:double, FuncSignLongToDouble(col 3) -> 48:double, FuncCosDoubleToDouble(col 50)(children: DoubleColAddDoubleScalar(col 49, val 3.14159)(children: DoubleColUnaryMinus(col 50)(children: FuncSinDoubleToDouble(col 49)(children: FuncLnDoubleToDouble(col 5) -> 49:double) -> 50:double) -> 49:double) -> 50:double) -> 49:double - Statistics: Num rows: 2048 Data size: 62872 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2048 Data size: 440327 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 2048 Data size: 62872 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2048 Data size: 440327 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/spark/vectorized_nested_mapjoin.q.out b/ql/src/test/results/clientpositive/spark/vectorized_nested_mapjoin.q.out index 996021f..6760e51 100644 --- a/ql/src/test/results/clientpositive/spark/vectorized_nested_mapjoin.q.out +++ b/ql/src/test/results/clientpositive/spark/vectorized_nested_mapjoin.q.out @@ -20,14 +20,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: v1 - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (ctinyint is not null and csmallint is not null) (type: boolean) - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ctinyint (type: tinyint), csmallint (type: smallint), cdouble (type: double) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: 0 _col0 (type: tinyint) @@ -47,14 +47,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: v3 - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: csmallint is not null (type: boolean) - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: csmallint (type: smallint) outputColumnNames: _col0 - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: 0 _col0 (type: smallint) @@ -81,14 +81,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: v2 - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: ctinyint is not null (type: boolean) - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ctinyint (type: tinyint) outputColumnNames: _col0 - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 @@ -98,11 +98,11 @@ STAGE PLANS: outputColumnNames: _col2, _col3 input vertices: 1 Map 3 - Statistics: Num rows: 13516 Data size: 414960 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13516 Data size: 2906160 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col2 (type: smallint), _col3 (type: double) outputColumnNames: _col0, _col1 - Statistics: Num rows: 13516 Data size: 414960 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13516 Data size: 2906160 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 @@ -112,7 +112,7 @@ STAGE PLANS: outputColumnNames: _col1 input vertices: 1 Map 4 - Statistics: Num rows: 14867 Data size: 456456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 14867 Data size: 3196776 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(_col1) mode: hash diff --git a/ql/src/test/results/clientpositive/spark/vectorized_shufflejoin.q.out b/ql/src/test/results/clientpositive/spark/vectorized_shufflejoin.q.out index 5930057..fed9902 100644 --- a/ql/src/test/results/clientpositive/spark/vectorized_shufflejoin.q.out +++ b/ql/src/test/results/clientpositive/spark/vectorized_shufflejoin.q.out @@ -27,7 +27,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: t1 - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] @@ -37,7 +37,7 @@ STAGE PLANS: native: true predicateExpression: SelectColumnIsNotNull(col 2) -> boolean predicate: cint is not null (type: boolean) - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cint (type: int) outputColumnNames: _col0 @@ -45,7 +45,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [2] - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + @@ -55,7 +55,7 @@ STAGE PLANS: native: false nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true nativeConditionsNotMet: Uniform Hash IS false - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Map Vectorization: enabled: true @@ -69,7 +69,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: t2 - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] @@ -79,7 +79,7 @@ STAGE PLANS: native: true predicateExpression: SelectColumnIsNotNull(col 2) -> boolean predicate: cint is not null (type: boolean) - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cint (type: int) outputColumnNames: _col0 @@ -87,7 +87,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [2] - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + @@ -97,7 +97,7 @@ STAGE PLANS: native: false nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true nativeConditionsNotMet: Uniform Hash IS false - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Map Vectorization: enabled: true @@ -121,11 +121,11 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 13516 Data size: 414960 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13516 Data size: 2906160 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: int), (_col0 + _col1) (type: int) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 13516 Data size: 414960 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13516 Data size: 2906160 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(_col0), max(_col1), min(_col0), avg(_col2) mode: hash diff --git a/ql/src/test/results/clientpositive/spark/vectorized_string_funcs.q.out b/ql/src/test/results/clientpositive/spark/vectorized_string_funcs.q.out index 26aab1c..b88eda5 100644 --- a/ql/src/test/results/clientpositive/spark/vectorized_string_funcs.q.out +++ b/ql/src/test/results/clientpositive/spark/vectorized_string_funcs.q.out @@ -59,17 +59,17 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesorc - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (((cbigint % 237) = 0) and (length(substr(cstring1, 1, 2)) <= 2) and (cstring1 like '%')) (type: boolean) - Statistics: Num rows: 1024 Data size: 31436 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1024 Data size: 220163 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: substr(cstring1, 1, 2) (type: string), substr(cstring1, 2) (type: string), lower(cstring1) (type: string), upper(cstring1) (type: string), upper(cstring1) (type: string), length(cstring1) (type: int), trim(cstring1) (type: string), ltrim(cstring1) (type: string), rtrim(cstring1) (type: string), concat(cstring1, cstring2) (type: string), concat('>', cstring1) (type: string), concat(cstring1, '<') (type: string), concat(substr(cstring1, 1, 2), substr(cstring2, 1, 2)) (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 - Statistics: Num rows: 1024 Data size: 31436 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1024 Data size: 220163 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1024 Data size: 31436 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1024 Data size: 220163 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/special_character_in_tabnames_2.q.out b/ql/src/test/results/clientpositive/special_character_in_tabnames_2.q.out index 8cb04c7..cb53f86 100644 --- a/ql/src/test/results/clientpositive/special_character_in_tabnames_2.q.out +++ b/ql/src/test/results/clientpositive/special_character_in_tabnames_2.q.out @@ -29,10 +29,12 @@ POSTHOOK: Output: default@s/c PREHOOK: query: ANALYZE TABLE `s/c` COMPUTE STATISTICS FOR COLUMNS key,value PREHOOK: type: QUERY PREHOOK: Input: default@s/c +PREHOOK: Output: default@s/c #### A masked pattern was here #### POSTHOOK: query: ANALYZE TABLE `s/c` COMPUTE STATISTICS FOR COLUMNS key,value POSTHOOK: type: QUERY POSTHOOK: Input: default@s/c +POSTHOOK: Output: default@s/c #### A masked pattern was here #### PREHOOK: query: SELECT key, value FROM `s/c` WHERE key > 80 AND key < 100 PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/stats_invalidation.q.out b/ql/src/test/results/clientpositive/stats_invalidation.q.out index a0e7663..a98d98e 100644 --- a/ql/src/test/results/clientpositive/stats_invalidation.q.out +++ b/ql/src/test/results/clientpositive/stats_invalidation.q.out @@ -21,10 +21,12 @@ POSTHOOK: Lineage: stats_invalid.value SIMPLE [(src)src.FieldSchema(name:value, PREHOOK: query: analyze table stats_invalid compute statistics for columns key,value PREHOOK: type: QUERY PREHOOK: Input: default@stats_invalid +PREHOOK: Output: default@stats_invalid #### A masked pattern was here #### POSTHOOK: query: analyze table stats_invalid compute statistics for columns key,value POSTHOOK: type: QUERY POSTHOOK: Input: default@stats_invalid +POSTHOOK: Output: default@stats_invalid #### A masked pattern was here #### PREHOOK: query: desc formatted stats_invalid PREHOOK: type: DESCTABLE diff --git a/ql/src/test/results/clientpositive/stats_missing_warning.q.out b/ql/src/test/results/clientpositive/stats_missing_warning.q.out index 0ed70a0..b905785 100644 --- a/ql/src/test/results/clientpositive/stats_missing_warning.q.out +++ b/ql/src/test/results/clientpositive/stats_missing_warning.q.out @@ -117,26 +117,32 @@ POSTHOOK: Input: default@missing_stats_t3 PREHOOK: query: ANALYZE TABLE missing_stats_t1 COMPUTE STATISTICS FOR COLUMNS PREHOOK: type: QUERY PREHOOK: Input: default@missing_stats_t1 +PREHOOK: Output: default@missing_stats_t1 #### A masked pattern was here #### POSTHOOK: query: ANALYZE TABLE missing_stats_t1 COMPUTE STATISTICS FOR COLUMNS POSTHOOK: type: QUERY POSTHOOK: Input: default@missing_stats_t1 +POSTHOOK: Output: default@missing_stats_t1 #### A masked pattern was here #### PREHOOK: query: ANALYZE TABLE missing_stats_t2 COMPUTE STATISTICS FOR COLUMNS PREHOOK: type: QUERY PREHOOK: Input: default@missing_stats_t2 +PREHOOK: Output: default@missing_stats_t2 #### A masked pattern was here #### POSTHOOK: query: ANALYZE TABLE missing_stats_t2 COMPUTE STATISTICS FOR COLUMNS POSTHOOK: type: QUERY POSTHOOK: Input: default@missing_stats_t2 +POSTHOOK: Output: default@missing_stats_t2 #### A masked pattern was here #### PREHOOK: query: ANALYZE TABLE missing_stats_t3 COMPUTE STATISTICS FOR COLUMNS PREHOOK: type: QUERY PREHOOK: Input: default@missing_stats_t3 +PREHOOK: Output: default@missing_stats_t3 #### A masked pattern was here #### POSTHOOK: query: ANALYZE TABLE missing_stats_t3 COMPUTE STATISTICS FOR COLUMNS POSTHOOK: type: QUERY POSTHOOK: Input: default@missing_stats_t3 +POSTHOOK: Output: default@missing_stats_t3 #### A masked pattern was here #### PREHOOK: query: SELECT COUNT(*) FROM missing_stats_t1 t1 diff --git a/ql/src/test/results/clientpositive/stats_only_null.q.out b/ql/src/test/results/clientpositive/stats_only_null.q.out index 88c2114..97947ba 100644 --- a/ql/src/test/results/clientpositive/stats_only_null.q.out +++ b/ql/src/test/results/clientpositive/stats_only_null.q.out @@ -169,10 +169,12 @@ STAGE PLANS: PREHOOK: query: analyze table stats_null compute statistics for columns a,b,c,d PREHOOK: type: QUERY PREHOOK: Input: default@stats_null +PREHOOK: Output: default@stats_null #### A masked pattern was here #### POSTHOOK: query: analyze table stats_null compute statistics for columns a,b,c,d POSTHOOK: type: QUERY POSTHOOK: Input: default@stats_null +POSTHOOK: Output: default@stats_null #### A masked pattern was here #### PREHOOK: query: analyze table stats_null_part partition(dt='2010') compute statistics for columns a,b,c,d PREHOOK: type: QUERY @@ -364,12 +366,18 @@ PREHOOK: type: QUERY PREHOOK: Input: default@stats_null_part PREHOOK: Input: default@stats_null_part@dt=1 PREHOOK: Input: default@stats_null_part@dt=__HIVE_DEFAULT_PARTITION__ +PREHOOK: Output: default@stats_null_part +PREHOOK: Output: default@stats_null_part@dt=1 +PREHOOK: Output: default@stats_null_part@dt=__HIVE_DEFAULT_PARTITION__ #### A masked pattern was here #### POSTHOOK: query: analyze table stats_null_part compute statistics for columns POSTHOOK: type: QUERY POSTHOOK: Input: default@stats_null_part POSTHOOK: Input: default@stats_null_part@dt=1 POSTHOOK: Input: default@stats_null_part@dt=__HIVE_DEFAULT_PARTITION__ +POSTHOOK: Output: default@stats_null_part +POSTHOOK: Output: default@stats_null_part@dt=1 +POSTHOOK: Output: default@stats_null_part@dt=__HIVE_DEFAULT_PARTITION__ #### A masked pattern was here #### PREHOOK: query: describe formatted stats_null_part partition(dt = 1) a PREHOOK: type: DESCTABLE diff --git a/ql/src/test/results/clientpositive/stats_partial_size.q.out b/ql/src/test/results/clientpositive/stats_partial_size.q.out index c779741..f6d5181 100644 --- a/ql/src/test/results/clientpositive/stats_partial_size.q.out +++ b/ql/src/test/results/clientpositive/stats_partial_size.q.out @@ -28,10 +28,12 @@ POSTHOOK: Output: default@sample PREHOOK: query: analyze table sample compute statistics for columns PREHOOK: type: QUERY PREHOOK: Input: default@sample +PREHOOK: Output: default@sample #### A masked pattern was here #### POSTHOOK: query: analyze table sample compute statistics for columns POSTHOOK: type: QUERY POSTHOOK: Input: default@sample +POSTHOOK: Output: default@sample #### A masked pattern was here #### PREHOOK: query: explain select sample_partitioned.x from sample_partitioned, sample where sample.y = sample_partitioned.y PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/stats_ppr_all.q.out b/ql/src/test/results/clientpositive/stats_ppr_all.q.out index d4060c6..c52f582 100644 --- a/ql/src/test/results/clientpositive/stats_ppr_all.q.out +++ b/ql/src/test/results/clientpositive/stats_ppr_all.q.out @@ -46,6 +46,10 @@ PREHOOK: Input: default@ss PREHOOK: Input: default@ss@country=US/year=2015/month=1/day=1 PREHOOK: Input: default@ss@country=US/year=2015/month=1/day=2 PREHOOK: Input: default@ss@country=US/year=2015/month=2/day=1 +PREHOOK: Output: default@ss +PREHOOK: Output: default@ss@country=US/year=2015/month=1/day=1 +PREHOOK: Output: default@ss@country=US/year=2015/month=1/day=2 +PREHOOK: Output: default@ss@country=US/year=2015/month=2/day=1 #### A masked pattern was here #### POSTHOOK: query: ANALYZE TABLE ss PARTITION(country,year,month,day) compute statistics for columns POSTHOOK: type: QUERY @@ -53,6 +57,10 @@ POSTHOOK: Input: default@ss POSTHOOK: Input: default@ss@country=US/year=2015/month=1/day=1 POSTHOOK: Input: default@ss@country=US/year=2015/month=1/day=2 POSTHOOK: Input: default@ss@country=US/year=2015/month=2/day=1 +POSTHOOK: Output: default@ss +POSTHOOK: Output: default@ss@country=US/year=2015/month=1/day=1 +POSTHOOK: Output: default@ss@country=US/year=2015/month=1/day=2 +POSTHOOK: Output: default@ss@country=US/year=2015/month=2/day=1 #### A masked pattern was here #### PREHOOK: query: explain select sum(order_amount) from ss where (country="US" and year=2015 and month=2 and day=1) PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/temp_table_display_colstats_tbllvl.q.out b/ql/src/test/results/clientpositive/temp_table_display_colstats_tbllvl.q.out index c7ee93d..02d49b7 100644 --- a/ql/src/test/results/clientpositive/temp_table_display_colstats_tbllvl.q.out +++ b/ql/src/test/results/clientpositive/temp_table_display_colstats_tbllvl.q.out @@ -66,7 +66,8 @@ analyze table UserVisits_web_text_none compute statistics for columns sourceIP, POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-0 is a root stage - Stage-1 depends on stages: Stage-0 + Stage-1 depends on stages: Stage-0, Stage-2 + Stage-2 depends on stages: Stage-0 STAGE PLANS: Stage: Stage-0 @@ -109,6 +110,9 @@ STAGE PLANS: Column Types: string, int, float Table: default.uservisits_web_text_none + Stage: Stage-2 + Stats-Aggr Operator + PREHOOK: query: explain extended analyze table UserVisits_web_text_none compute statistics for columns sourceIP, avgTimeOnSite, adRevenue PREHOOK: type: QUERY @@ -117,7 +121,8 @@ analyze table UserVisits_web_text_none compute statistics for columns sourceIP, POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-0 is a root stage - Stage-1 depends on stages: Stage-0 + Stage-1 depends on stages: Stage-0, Stage-2 + Stage-2 depends on stages: Stage-0 STAGE PLANS: Stage: Stage-0 @@ -126,7 +131,8 @@ STAGE PLANS: TableScan alias: uservisits_web_text_none Statistics: Num rows: 65 Data size: 7060 Basic stats: COMPLETE Column stats: NONE - GatherStats: false + Statistics Aggregation Key Prefix: default.uservisits_web_text_none/ + GatherStats: true Select Operator expressions: sourceip (type: string), avgtimeonsite (type: int), adrevenue (type: float) outputColumnNames: sourceip, avgtimeonsite, adrevenue @@ -228,13 +234,19 @@ STAGE PLANS: Table: default.uservisits_web_text_none Is Table Level Stats: true + Stage: Stage-2 + Stats-Aggr Operator + Stats Aggregation Key Prefix: default.uservisits_web_text_none/ + PREHOOK: query: analyze table UserVisits_web_text_none compute statistics for columns sourceIP, avgTimeOnSite, adRevenue PREHOOK: type: QUERY PREHOOK: Input: default@uservisits_web_text_none +PREHOOK: Output: default@uservisits_web_text_none #### A masked pattern was here #### POSTHOOK: query: analyze table UserVisits_web_text_none compute statistics for columns sourceIP, avgTimeOnSite, adRevenue POSTHOOK: type: QUERY POSTHOOK: Input: default@uservisits_web_text_none +POSTHOOK: Output: default@uservisits_web_text_none #### A masked pattern was here #### PREHOOK: query: desc formatted UserVisits_web_text_none sourceIP PREHOOK: type: DESCTABLE @@ -300,7 +312,8 @@ analyze table empty_tab compute statistics for columns a,b,c,d,e POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-0 is a root stage - Stage-1 depends on stages: Stage-0 + Stage-1 depends on stages: Stage-0, Stage-2 + Stage-2 depends on stages: Stage-0 STAGE PLANS: Stage: Stage-0 @@ -343,13 +356,18 @@ STAGE PLANS: Column Types: int, double, string, boolean, binary Table: default.empty_tab + Stage: Stage-2 + Stats-Aggr Operator + PREHOOK: query: analyze table empty_tab compute statistics for columns a,b,c,d,e PREHOOK: type: QUERY PREHOOK: Input: default@empty_tab +PREHOOK: Output: default@empty_tab #### A masked pattern was here #### POSTHOOK: query: analyze table empty_tab compute statistics for columns a,b,c,d,e POSTHOOK: type: QUERY POSTHOOK: Input: default@empty_tab +POSTHOOK: Output: default@empty_tab #### A masked pattern was here #### PREHOOK: query: desc formatted empty_tab a PREHOOK: type: DESCTABLE @@ -469,10 +487,12 @@ PREHOOK: query: analyze table UserVisits_web_text_none compute statistics for co PREHOOK: type: QUERY PREHOOK: Input: test@uservisits_web_text_none #### A masked pattern was here #### +PREHOOK: Output: test@uservisits_web_text_none POSTHOOK: query: analyze table UserVisits_web_text_none compute statistics for columns sKeyword POSTHOOK: type: QUERY POSTHOOK: Input: test@uservisits_web_text_none #### A masked pattern was here #### +POSTHOOK: Output: test@uservisits_web_text_none PREHOOK: query: desc extended UserVisits_web_text_none sKeyword PREHOOK: type: DESCTABLE PREHOOK: Input: test@uservisits_web_text_none diff --git a/ql/src/test/results/clientpositive/tez/explainanalyze_3.q.out b/ql/src/test/results/clientpositive/tez/explainanalyze_3.q.out index dd8849d..e539235 100644 --- a/ql/src/test/results/clientpositive/tez/explainanalyze_3.q.out +++ b/ql/src/test/results/clientpositive/tez/explainanalyze_3.q.out @@ -234,10 +234,12 @@ Stage-2 PREHOOK: query: analyze table src_stats compute statistics for columns PREHOOK: type: QUERY PREHOOK: Input: default@src_stats +PREHOOK: Output: default@src_stats #### A masked pattern was here #### POSTHOOK: query: analyze table src_stats compute statistics for columns POSTHOOK: type: QUERY POSTHOOK: Input: default@src_stats +POSTHOOK: Output: default@src_stats #### A masked pattern was here #### PREHOOK: query: explain analyze analyze table src_stats compute statistics for columns PREHOOK: type: QUERY @@ -259,6 +261,9 @@ Stage-2 Output:["key","value"] TableScan [TS_0] (rows=500/500 width=10) default@src_stats,src_stats,Tbl:COMPLETE,Col:NONE,Output:["key","value"] + Stage-3 + Stats-Aggr Operator + Please refer to the previous Stage-0 PREHOOK: query: CREATE TEMPORARY MACRO SIGMOID (x DOUBLE) 1.0 / (1.0 + EXP(-x)) PREHOOK: type: CREATEMACRO diff --git a/ql/src/test/results/clientpositive/tez/explainanalyze_5.q.out b/ql/src/test/results/clientpositive/tez/explainanalyze_5.q.out index ee9affb..d5fa74b 100644 --- a/ql/src/test/results/clientpositive/tez/explainanalyze_5.q.out +++ b/ql/src/test/results/clientpositive/tez/explainanalyze_5.q.out @@ -36,10 +36,12 @@ Stage-2 PREHOOK: query: analyze table src_stats compute statistics for columns PREHOOK: type: QUERY PREHOOK: Input: default@src_stats +PREHOOK: Output: default@src_stats #### A masked pattern was here #### POSTHOOK: query: analyze table src_stats compute statistics for columns POSTHOOK: type: QUERY POSTHOOK: Input: default@src_stats +POSTHOOK: Output: default@src_stats #### A masked pattern was here #### PREHOOK: query: explain analyze analyze table src_stats compute statistics for columns PREHOOK: type: QUERY @@ -61,6 +63,9 @@ Stage-2 Output:["key","value"] TableScan [TS_0] (rows=500/500 width=10) default@src_stats,src_stats,Tbl:COMPLETE,Col:NONE,Output:["key","value"] + Stage-3 + Stats-Aggr Operator + Please refer to the previous Stage-0 PREHOOK: query: drop table src_multi2 PREHOOK: type: DROPTABLE diff --git a/ql/src/test/results/clientpositive/tez/explainuser_3.q.out b/ql/src/test/results/clientpositive/tez/explainuser_3.q.out index ef71d73..8137047 100644 --- a/ql/src/test/results/clientpositive/tez/explainuser_3.q.out +++ b/ql/src/test/results/clientpositive/tez/explainuser_3.q.out @@ -19,10 +19,12 @@ POSTHOOK: Lineage: acid_vectorized.b SIMPLE [(alltypesorc)alltypesorc.FieldSchem PREHOOK: query: analyze table acid_vectorized compute statistics for columns PREHOOK: type: QUERY PREHOOK: Input: default@acid_vectorized +PREHOOK: Output: default@acid_vectorized #### A masked pattern was here #### POSTHOOK: query: analyze table acid_vectorized compute statistics for columns POSTHOOK: type: QUERY POSTHOOK: Input: default@acid_vectorized +POSTHOOK: Output: default@acid_vectorized #### A masked pattern was here #### PREHOOK: query: explain select a, b from acid_vectorized order by a, b PREHOOK: type: QUERY @@ -39,13 +41,13 @@ Stage-0 Stage-1 Reducer 2 vectorized File Output Operator [FS_8] - Select Operator [SEL_7] (rows=16 width=101) + Select Operator [SEL_7] (rows=10 width=101) Output:["_col0","_col1"] <-Map 1 [SIMPLE_EDGE] vectorized SHUFFLE [RS_6] - Select Operator [SEL_5] (rows=16 width=101) + Select Operator [SEL_5] (rows=10 width=101) Output:["_col0","_col1"] - TableScan [TS_0] (rows=16 width=101) + TableScan [TS_0] (rows=10 width=101) default@acid_vectorized,acid_vectorized, ACID table,Tbl:COMPLETE,Col:COMPLETE,Output:["a","b"] PREHOOK: query: explain select key, value @@ -215,6 +217,9 @@ Stage-2 Output:["key","value"] TableScan [TS_0] (rows=500 width=178) default@src,src,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] + Stage-3 + Stats-Aggr Operator + Please refer to the previous Stage-0 PREHOOK: query: explain CREATE TEMPORARY MACRO SIGMOID (x DOUBLE) 1.0 / (1.0 + EXP(-x)) diff --git a/ql/src/test/results/clientpositive/vector_outer_join1.q.out b/ql/src/test/results/clientpositive/vector_outer_join1.q.out index 3df63dc..01281ab 100644 --- a/ql/src/test/results/clientpositive/vector_outer_join1.q.out +++ b/ql/src/test/results/clientpositive/vector_outer_join1.q.out @@ -186,10 +186,12 @@ POSTHOOK: Output: default@small_alltypesorc_a PREHOOK: query: ANALYZE TABLE small_alltypesorc_a COMPUTE STATISTICS FOR COLUMNS PREHOOK: type: QUERY PREHOOK: Input: default@small_alltypesorc_a +PREHOOK: Output: default@small_alltypesorc_a #### A masked pattern was here #### POSTHOOK: query: ANALYZE TABLE small_alltypesorc_a COMPUTE STATISTICS FOR COLUMNS POSTHOOK: type: QUERY POSTHOOK: Input: default@small_alltypesorc_a +POSTHOOK: Output: default@small_alltypesorc_a #### A masked pattern was here #### PREHOOK: query: select * from small_alltypesorc_a PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/vector_outer_join2.q.out b/ql/src/test/results/clientpositive/vector_outer_join2.q.out index ec7de23..7fd60de 100644 --- a/ql/src/test/results/clientpositive/vector_outer_join2.q.out +++ b/ql/src/test/results/clientpositive/vector_outer_join2.q.out @@ -191,10 +191,12 @@ POSTHOOK: Output: default@small_alltypesorc_a PREHOOK: query: ANALYZE TABLE small_alltypesorc_a COMPUTE STATISTICS FOR COLUMNS PREHOOK: type: QUERY PREHOOK: Input: default@small_alltypesorc_a +PREHOOK: Output: default@small_alltypesorc_a #### A masked pattern was here #### POSTHOOK: query: ANALYZE TABLE small_alltypesorc_a COMPUTE STATISTICS FOR COLUMNS POSTHOOK: type: QUERY POSTHOOK: Input: default@small_alltypesorc_a +POSTHOOK: Output: default@small_alltypesorc_a #### A masked pattern was here #### PREHOOK: query: select * from small_alltypesorc_a PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/vector_outer_join3.q.out b/ql/src/test/results/clientpositive/vector_outer_join3.q.out index 1d2abee..4df1f61 100644 --- a/ql/src/test/results/clientpositive/vector_outer_join3.q.out +++ b/ql/src/test/results/clientpositive/vector_outer_join3.q.out @@ -191,10 +191,12 @@ POSTHOOK: Output: default@small_alltypesorc_a PREHOOK: query: ANALYZE TABLE small_alltypesorc_a COMPUTE STATISTICS FOR COLUMNS PREHOOK: type: QUERY PREHOOK: Input: default@small_alltypesorc_a +PREHOOK: Output: default@small_alltypesorc_a #### A masked pattern was here #### POSTHOOK: query: ANALYZE TABLE small_alltypesorc_a COMPUTE STATISTICS FOR COLUMNS POSTHOOK: type: QUERY POSTHOOK: Input: default@small_alltypesorc_a +POSTHOOK: Output: default@small_alltypesorc_a #### A masked pattern was here #### PREHOOK: query: select * from small_alltypesorc_a PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/vector_outer_join4.q.out b/ql/src/test/results/clientpositive/vector_outer_join4.q.out index a73a5e1..8454bc6 100644 --- a/ql/src/test/results/clientpositive/vector_outer_join4.q.out +++ b/ql/src/test/results/clientpositive/vector_outer_join4.q.out @@ -201,10 +201,12 @@ POSTHOOK: Output: default@small_alltypesorc_b PREHOOK: query: ANALYZE TABLE small_alltypesorc_b COMPUTE STATISTICS FOR COLUMNS PREHOOK: type: QUERY PREHOOK: Input: default@small_alltypesorc_b +PREHOOK: Output: default@small_alltypesorc_b #### A masked pattern was here #### POSTHOOK: query: ANALYZE TABLE small_alltypesorc_b COMPUTE STATISTICS FOR COLUMNS POSTHOOK: type: QUERY POSTHOOK: Input: default@small_alltypesorc_b +POSTHOOK: Output: default@small_alltypesorc_b #### A masked pattern was here #### PREHOOK: query: select * from small_alltypesorc_b PREHOOK: type: QUERY