diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties index b01ebd8..d4ea20a 100644 --- a/itests/src/test/resources/testconfiguration.properties +++ b/itests/src/test/resources/testconfiguration.properties @@ -474,6 +474,7 @@ minillaplocal.query.files=acid_globallimit.q,\ cbo_rp_unionDistinct_2.q,\ cbo_rp_windowing_2.q,\ cbo_subq_not_in.q,\ + column_table_stats.q,\ constprog_dpp.q,\ current_date_timestamp.q,\ correlationoptimizer1.q,\ diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java index 3e749eb..36009bf 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java @@ -1007,6 +1007,23 @@ public TableSpec(Hive db, HiveConf conf, String tableName, Map p } } + public TableSpec(Table tableHandle, List partitions) + throws HiveException { + this.tableHandle = tableHandle; + this.tableName = tableHandle.getTableName(); + if (partitions != null && !partitions.isEmpty()) { + this.specType = SpecType.STATIC_PARTITION; + this.partitions = partitions; + List partCols = this.tableHandle.getPartCols(); + this.partSpec = new LinkedHashMap<>(); + for (FieldSchema partCol : partCols) { + partSpec.put(partCol.getName(), null); + } + } else { + this.specType = SpecType.TABLE_ONLY; + } + } + public TableSpec(Hive db, HiveConf conf, ASTNode ast, boolean allowDynamicPartitionsSpec, boolean allowPartialPartitionsSpec) throws SemanticException { assert (ast.getToken().getType() == HiveParser.TOK_TAB @@ -1156,7 +1173,6 @@ public String toString() { private List colType; private boolean tblLvl; - public String getTableName() { return tableName; } @@ -1188,6 +1204,7 @@ public void setTblLvl(boolean isTblLvl) { public void setColType(List colType) { this.colType = colType; } + } /** diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezUtils.java index 7f5fdff..905431f 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezUtils.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezUtils.java @@ -199,6 +199,10 @@ protected void setupMapWork(MapWork mapWork, GenTezProcContext context, // All the setup is done in GenMapRedUtils GenMapRedUtils.setMapWork(mapWork, context.parseContext, context.inputs, partitions, root, alias, context.conf, false); + // we also collect table stats while collecting column stats. + if (context.parseContext.getAnalyzeRewrite() != null) { + mapWork.setGatheringStats(true); + } } // removes any union operator and clones the plan diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/ProcessAnalyzeTable.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/ProcessAnalyzeTable.java index c13a404..1dafde1 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/ProcessAnalyzeTable.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/ProcessAnalyzeTable.java @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.parse; +import java.io.Serializable; +import java.util.ArrayList; import java.util.List; import java.util.Set; import java.util.Stack; @@ -30,14 +32,17 @@ import org.apache.hadoop.hive.ql.exec.TableScanOperator; import org.apache.hadoop.hive.ql.exec.Task; import org.apache.hadoop.hive.ql.exec.TaskFactory; +import org.apache.hadoop.hive.ql.hooks.WriteEntity; import org.apache.hadoop.hive.ql.io.orc.OrcInputFormat; import org.apache.hadoop.hive.ql.io.rcfile.stats.PartialScanWork; import org.apache.hadoop.hive.ql.lib.Node; import org.apache.hadoop.hive.ql.lib.NodeProcessor; import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx; +import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.metadata.Partition; import org.apache.hadoop.hive.ql.metadata.Table; import org.apache.hadoop.hive.ql.optimizer.GenMapRedUtils; +import org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer.TableSpec; import org.apache.hadoop.hive.ql.plan.MapWork; import org.apache.hadoop.hive.ql.plan.StatsNoJobWork; import org.apache.hadoop.hive.ql.plan.StatsWork; @@ -65,9 +70,8 @@ public ProcessAnalyzeTable(GenTezUtils utils) { @SuppressWarnings("unchecked") @Override - public Object process(Node nd, Stack stack, - NodeProcessorCtx procContext, Object... nodeOutputs) - throws SemanticException { + public Object process(Node nd, Stack stack, NodeProcessorCtx procContext, + Object... nodeOutputs) throws SemanticException { GenTezProcContext context = (GenTezProcContext) procContext; @@ -79,18 +83,16 @@ public Object process(Node nd, Stack stack, if (parseContext.getQueryProperties().isAnalyzeCommand()) { - assert tableScan.getChildOperators() == null - || tableScan.getChildOperators().size() == 0; + assert tableScan.getChildOperators() == null || tableScan.getChildOperators().size() == 0; String alias = null; - for (String a: parseContext.getTopOps().keySet()) { + for (String a : parseContext.getTopOps().keySet()) { if (tableScan == parseContext.getTopOps().get(a)) { alias = a; } } assert alias != null; - TezWork tezWork = context.currentTask.getWork(); if (inputFormat.equals(OrcInputFormat.class)) { // For ORC, all the following statements are the same @@ -99,7 +101,8 @@ public Object process(Node nd, Stack stack, // ANALYZE TABLE T [PARTITION (...)] COMPUTE STATISTICS noscan; // There will not be any Tez job above this task - StatsNoJobWork snjWork = new StatsNoJobWork(tableScan.getConf().getTableMetadata().getTableSpec()); + StatsNoJobWork snjWork = new StatsNoJobWork(tableScan.getConf().getTableMetadata() + .getTableSpec()); snjWork.setStatsReliable(parseContext.getConf().getBoolVar( HiveConf.ConfVars.HIVE_STATS_RELIABLE)); // If partition is specified, get pruned partition list @@ -107,8 +110,8 @@ public Object process(Node nd, Stack stack, if (confirmedParts.size() > 0) { Table source = tableScan.getConf().getTableMetadata(); List partCols = GenMapRedUtils.getPartitionColumns(tableScan); - PrunedPartitionList partList = new PrunedPartitionList(source, confirmedParts, - partCols, false); + PrunedPartitionList partList = new PrunedPartitionList(source, confirmedParts, partCols, + false); snjWork.setPrunedPartitionList(partList); } Task snjTask = TaskFactory.get(snjWork, parseContext.getConf()); @@ -118,52 +121,106 @@ public Object process(Node nd, Stack stack, return true; } else { - // ANALYZE TABLE T [PARTITION (...)] COMPUTE STATISTICS; - // The plan consists of a simple TezTask followed by a StatsTask. - // The Tez task is just a simple TableScanOperator + // ANALYZE TABLE T [PARTITION (...)] COMPUTE STATISTICS; + // The plan consists of a simple TezTask followed by a StatsTask. + // The Tez task is just a simple TableScanOperator - StatsWork statsWork = new StatsWork(tableScan.getConf().getTableMetadata().getTableSpec()); - statsWork.setAggKey(tableScan.getConf().getStatsAggPrefix()); - statsWork.setStatsTmpDir(tableScan.getConf().getTmpStatsDir()); - statsWork.setSourceTask(context.currentTask); - statsWork.setStatsReliable(parseContext.getConf().getBoolVar(HiveConf.ConfVars.HIVE_STATS_RELIABLE)); - Task statsTask = TaskFactory.get(statsWork, parseContext.getConf()); - context.currentTask.addDependentTask(statsTask); - - // ANALYZE TABLE T [PARTITION (...)] COMPUTE STATISTICS noscan; - // The plan consists of a StatsTask only. - if (parseContext.getQueryProperties().isNoScanAnalyzeCommand()) { - statsTask.setParentTasks(null); - statsWork.setNoScanAnalyzeCommand(true); - context.rootTasks.remove(context.currentTask); - context.rootTasks.add(statsTask); - } + StatsWork statsWork = new StatsWork(tableScan.getConf().getTableMetadata().getTableSpec()); + statsWork.setAggKey(tableScan.getConf().getStatsAggPrefix()); + statsWork.setStatsTmpDir(tableScan.getConf().getTmpStatsDir()); + statsWork.setSourceTask(context.currentTask); + statsWork.setStatsReliable(parseContext.getConf().getBoolVar( + HiveConf.ConfVars.HIVE_STATS_RELIABLE)); + Task statsTask = TaskFactory.get(statsWork, parseContext.getConf()); + context.currentTask.addDependentTask(statsTask); - // ANALYZE TABLE T [PARTITION (...)] COMPUTE STATISTICS partialscan; - if (parseContext.getQueryProperties().isPartialScanAnalyzeCommand()) { - handlePartialScanCommand(tableScan, parseContext, statsWork, context, statsTask); - } + // ANALYZE TABLE T [PARTITION (...)] COMPUTE STATISTICS noscan; + // The plan consists of a StatsTask only. + if (parseContext.getQueryProperties().isNoScanAnalyzeCommand()) { + statsTask.setParentTasks(null); + statsWork.setNoScanAnalyzeCommand(true); + context.rootTasks.remove(context.currentTask); + context.rootTasks.add(statsTask); + } - // NOTE: here we should use the new partition predicate pushdown API to get a list of pruned list, - // and pass it to setTaskPlan as the last parameter - Set confirmedPartns = GenMapRedUtils.getConfirmedPartitionsForScan(tableScan); - PrunedPartitionList partitions = null; - if (confirmedPartns.size() > 0) { - Table source = tableScan.getConf().getTableMetadata(); - List partCols = GenMapRedUtils.getPartitionColumns(tableScan); - partitions = new PrunedPartitionList(source, confirmedPartns, partCols, false); - } + // ANALYZE TABLE T [PARTITION (...)] COMPUTE STATISTICS partialscan; + if (parseContext.getQueryProperties().isPartialScanAnalyzeCommand()) { + handlePartialScanCommand(tableScan, parseContext, statsWork, context, statsTask); + } + + // NOTE: here we should use the new partition predicate pushdown API to + // get a list of pruned list, + // and pass it to setTaskPlan as the last parameter + Set confirmedPartns = GenMapRedUtils.getConfirmedPartitionsForScan(tableScan); + PrunedPartitionList partitions = null; + if (confirmedPartns.size() > 0) { + Table source = tableScan.getConf().getTableMetadata(); + List partCols = GenMapRedUtils.getPartitionColumns(tableScan); + partitions = new PrunedPartitionList(source, confirmedPartns, partCols, false); + } - MapWork w = utils.createMapWork(context, tableScan, tezWork, partitions); - w.setGatheringStats(true); + MapWork w = utils.createMapWork(context, tableScan, tezWork, partitions); + w.setGatheringStats(true); - return true; + return true; + } + } else if (parseContext.getAnalyzeRewrite() != null) { + // we need to collect table stats while collecting column stats. + try { + context.currentTask.addDependentTask(genTableStats(context, tableScan)); + } catch (HiveException e) { + throw new SemanticException(e); } } return null; } + private Task genTableStats(GenTezProcContext context, TableScanOperator tableScan) + throws HiveException { + Class inputFormat = tableScan.getConf().getTableMetadata() + .getInputFormatClass(); + ParseContext parseContext = context.parseContext; + Table table = tableScan.getConf().getTableMetadata(); + List partitions = new ArrayList<>(); + if (table.isPartitioned()) { + partitions.addAll(parseContext.getPrunedPartitions(tableScan).getPartitions()); + for (Partition partn : partitions) { + // inputs.add(new ReadEntity(partn)); // is this needed at all? + LOG.info("XXX: adding part: " + partn); + context.outputs.add(new WriteEntity(partn, WriteEntity.WriteType.DDL_NO_LOCK)); + } + } + TableSpec tableSpec = new TableSpec(table, partitions); + tableScan.getConf().getTableMetadata().setTableSpec(tableSpec); + + if (inputFormat.equals(OrcInputFormat.class)) { + // For ORC, all the following statements are the same + StatsNoJobWork snjWork = new StatsNoJobWork(tableScan.getConf().getTableMetadata() + .getTableSpec()); + snjWork.setStatsReliable(parseContext.getConf().getBoolVar( + HiveConf.ConfVars.HIVE_STATS_RELIABLE)); + // If partition is specified, get pruned partition list + if (partitions.size() > 0) { + snjWork.setPrunedPartitionList(parseContext.getPrunedPartitions(tableScan)); + } + return TaskFactory.get(snjWork, parseContext.getConf()); + } else { + + // ANALYZE TABLE T [PARTITION (...)] COMPUTE STATISTICS; + // The plan consists of a simple TezTask followed by a StatsTask. + // The Tez task is just a simple TableScanOperator + + StatsWork statsWork = new StatsWork(tableScan.getConf().getTableMetadata().getTableSpec()); + statsWork.setAggKey(tableScan.getConf().getStatsAggPrefix()); + statsWork.setStatsTmpDir(tableScan.getConf().getTmpStatsDir()); + statsWork.setSourceTask(context.currentTask); + statsWork.setStatsReliable(parseContext.getConf().getBoolVar( + HiveConf.ConfVars.HIVE_STATS_RELIABLE)); + return TaskFactory.get(statsWork, parseContext.getConf()); + } + } + /** * handle partial scan command. * @@ -171,11 +228,12 @@ public Object process(Node nd, Stack stack, */ private void handlePartialScanCommand(TableScanOperator tableScan, ParseContext parseContext, StatsWork statsWork, GenTezProcContext context, Task statsTask) - throws SemanticException { + throws SemanticException { String aggregationKey = tableScan.getConf().getStatsAggPrefix(); StringBuilder aggregationKeyBuffer = new StringBuilder(aggregationKey); - List inputPaths = GenMapRedUtils.getInputPathsForPartialScan(tableScan, aggregationKeyBuffer); + List inputPaths = GenMapRedUtils.getInputPathsForPartialScan(tableScan, + aggregationKeyBuffer); aggregationKey = aggregationKeyBuffer.toString(); // scan work diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java index 0872e53..e4feeea 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java @@ -10256,7 +10256,11 @@ private void setupStats(TableScanDesc tsDesc, QBParseInfo qbp, Table tab, String RowResolver rwsch) throws SemanticException { - if (!qbp.isAnalyzeCommand()) { + // if it is not analyze command and not column stats, then do not gatherstats + // if it is column stats, but it is not tez, do not gatherstats + if ((!qbp.isAnalyzeCommand() && qbp.getAnalyzeRewrite() == null) + || (qbp.getAnalyzeRewrite() != null && !HiveConf.getVar(conf, + HiveConf.ConfVars.HIVE_EXECUTION_ENGINE).equals("tez"))) { tsDesc.setGatherStats(false); } else { if (HiveConf.getVar(conf, HIVESTATSDBCLASS).equalsIgnoreCase(StatDB.fs.name())) { @@ -10279,15 +10283,6 @@ private void setupStats(TableScanDesc tsDesc, QBParseInfo qbp, Table tab, String tsDesc.addVirtualCols(vcList); String tblName = tab.getTableName(); - TableSpec tblSpec = qbp.getTableSpec(alias); - Map partSpec = tblSpec.getPartSpec(); - - if (partSpec != null) { - List cols = new ArrayList(); - cols.addAll(partSpec.keySet()); - tsDesc.setPartColumns(cols); - } - // Theoretically the key prefix could be any unique string shared // between TableScanOperator (when publishing) and StatsTask (when aggregating). // Here we use @@ -10296,13 +10291,27 @@ private void setupStats(TableScanDesc tsDesc, QBParseInfo qbp, Table tab, String // Currently, partition spec can only be static partition. String k = MetaStoreUtils.encodeTableName(tblName) + Path.SEPARATOR; tsDesc.setStatsAggPrefix(tab.getDbName()+"."+k); - + // set up WriteEntity for replication outputs.add(new WriteEntity(tab, WriteEntity.WriteType.DDL_SHARED)); // add WriteEntity for each matching partition if (tab.isPartitioned()) { - if (partSpec == null) { + List cols = new ArrayList(); + if (qbp.getAnalyzeRewrite() != null) { + List partitionCols = tab.getPartCols(); + for (FieldSchema fs : partitionCols) { + cols.add(fs.getName()); + } + tsDesc.setPartColumns(cols); + return; + } + TableSpec tblSpec = qbp.getTableSpec(alias); + Map partSpec = tblSpec.getPartSpec(); + if (partSpec != null) { + cols.addAll(partSpec.keySet()); + tsDesc.setPartColumns(cols); + } else { throw new SemanticException(ErrorMsg.NEED_PARTITION_SPECIFICATION.getMsg()); } List partitions = qbp.getTableSpec().partitions; diff --git a/ql/src/test/queries/clientpositive/column_table_stats.q b/ql/src/test/queries/clientpositive/column_table_stats.q new file mode 100644 index 0000000..991fa54 --- /dev/null +++ b/ql/src/test/queries/clientpositive/column_table_stats.q @@ -0,0 +1,88 @@ +set hive.mapred.mode=nonstrict; +-- SORT_QUERY_RESULTS + +DROP TABLE IF EXISTS s; + +CREATE TABLE s (key STRING COMMENT 'default', value STRING COMMENT 'default') STORED AS TEXTFILE; + +LOAD DATA LOCAL INPATH '../../data/files/kv1.txt' INTO TABLE s; + +desc formatted s; + +explain extended analyze table s compute statistics for columns; + +analyze table s compute statistics for columns; + +desc formatted s; + +DROP TABLE IF EXISTS spart; + +CREATE TABLE spart (key STRING COMMENT 'default', value STRING COMMENT 'default') +PARTITIONED BY (ds STRING, hr STRING) +STORED AS TEXTFILE; + +LOAD DATA LOCAL INPATH "../../data/files/kv1.txt" +OVERWRITE INTO TABLE spart PARTITION (ds="2008-04-08", hr="11"); + +LOAD DATA LOCAL INPATH "../../data/files/kv1.txt" +OVERWRITE INTO TABLE spart PARTITION (ds="2008-04-08", hr="12"); + + +desc formatted spart; + +explain extended analyze table spart compute statistics for columns; + +analyze table spart compute statistics for columns; + +desc formatted spart; + +desc formatted spart PARTITION(ds='2008-04-08', hr=11); +desc formatted spart PARTITION(ds='2008-04-08', hr=12); + +DROP TABLE IF EXISTS spart; + +CREATE TABLE spart (key STRING COMMENT 'default', value STRING COMMENT 'default') +PARTITIONED BY (ds STRING, hr STRING) +STORED AS TEXTFILE; + +LOAD DATA LOCAL INPATH "../../data/files/kv1.txt" +OVERWRITE INTO TABLE spart PARTITION (ds="2008-04-08", hr="11"); + +LOAD DATA LOCAL INPATH "../../data/files/kv1.txt" +OVERWRITE INTO TABLE spart PARTITION (ds="2008-04-08", hr="12"); + + +desc formatted spart; + +explain extended analyze table spart partition(ds,hr) compute statistics for columns; + +analyze table spart partition(ds,hr) compute statistics for columns; + +desc formatted spart; + +desc formatted spart PARTITION(ds='2008-04-08', hr=11); +desc formatted spart PARTITION(ds='2008-04-08', hr=12); + +DROP TABLE IF EXISTS spart; + +CREATE TABLE spart (key STRING COMMENT 'default', value STRING COMMENT 'default') +PARTITIONED BY (ds STRING, hr STRING) +STORED AS TEXTFILE; + +LOAD DATA LOCAL INPATH "../../data/files/kv1.txt" +OVERWRITE INTO TABLE spart PARTITION (ds="2008-04-08", hr="11"); + +LOAD DATA LOCAL INPATH "../../data/files/kv1.txt" +OVERWRITE INTO TABLE spart PARTITION (ds="2008-04-08", hr="12"); + + +desc formatted spart; + +explain extended analyze table spart partition(hr="11") compute statistics for columns; + +analyze table spart partition(hr="11") compute statistics for columns; + +desc formatted spart; + +desc formatted spart PARTITION(ds='2008-04-08', hr=11); +desc formatted spart PARTITION(ds='2008-04-08', hr=12); diff --git a/ql/src/test/queries/clientpositive/column_table_stats_orc.q b/ql/src/test/queries/clientpositive/column_table_stats_orc.q new file mode 100644 index 0000000..51fccd2 --- /dev/null +++ b/ql/src/test/queries/clientpositive/column_table_stats_orc.q @@ -0,0 +1,57 @@ +set hive.mapred.mode=nonstrict; +-- SORT_QUERY_RESULTS + +DROP TABLE IF EXISTS s; + +CREATE TABLE s (key STRING COMMENT 'default', value STRING COMMENT 'default') STORED AS ORC; + +insert into table s values ('1','2'); + +desc formatted s; + +explain extended analyze table s compute statistics for columns; + +analyze table s compute statistics for columns; + +desc formatted s; + +DROP TABLE IF EXISTS spart; + +CREATE TABLE spart (key STRING COMMENT 'default', value STRING COMMENT 'default') +PARTITIONED BY (ds STRING, hr STRING) +STORED AS ORC; + +insert into table spart PARTITION (ds="2008-04-08", hr="12") values ('1','2'); +insert into table spart PARTITION (ds="2008-04-08", hr="11") values ('1','2'); + +desc formatted spart; + +explain extended analyze table spart compute statistics for columns; + +analyze table spart compute statistics for columns; + +desc formatted spart; + +desc formatted spart PARTITION(ds='2008-04-08', hr=11); +desc formatted spart PARTITION(ds='2008-04-08', hr=12); + + +DROP TABLE IF EXISTS spart; + +CREATE TABLE spart (key STRING COMMENT 'default', value STRING COMMENT 'default') +PARTITIONED BY (ds STRING, hr STRING) +STORED AS ORC; + +insert into table spart PARTITION (ds="2008-04-08", hr="12") values ('1','2'); +insert into table spart PARTITION (ds="2008-04-08", hr="11") values ('1','2'); + +desc formatted spart; + +explain extended analyze table spart partition(hr="11") compute statistics for columns; + +analyze table spart partition(hr="11") compute statistics for columns; + +desc formatted spart; + +desc formatted spart PARTITION(ds='2008-04-08', hr=11); +desc formatted spart PARTITION(ds='2008-04-08', hr=12); diff --git a/ql/src/test/results/clientpositive/llap/column_table_stats.q.out b/ql/src/test/results/clientpositive/llap/column_table_stats.q.out new file mode 100644 index 0000000..a920e0c --- /dev/null +++ b/ql/src/test/results/clientpositive/llap/column_table_stats.q.out @@ -0,0 +1,1395 @@ +PREHOOK: query: DROP TABLE IF EXISTS s +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE IF EXISTS s +POSTHOOK: type: DROPTABLE +PREHOOK: query: CREATE TABLE s (key STRING COMMENT 'default', value STRING COMMENT 'default') STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@s +POSTHOOK: query: CREATE TABLE s (key STRING COMMENT 'default', value STRING COMMENT 'default') STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@s +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/kv1.txt' INTO TABLE s +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@s +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/kv1.txt' INTO TABLE s +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@s +PREHOOK: query: desc formatted s +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@s +POSTHOOK: query: desc formatted s +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@s +# col_name data_type comment + +key string default +value string default + +# Detailed Table Information +Database: default +#### A masked pattern was here #### +Retention: 0 +#### A masked pattern was here #### +Table Type: MANAGED_TABLE +Table Parameters: + numFiles 1 + numRows 0 + rawDataSize 0 + totalSize 5812 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: explain extended analyze table s compute statistics for columns +PREHOOK: type: QUERY +POSTHOOK: query: explain extended analyze table s compute statistics for columns +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-0 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Statistics Aggregation Key Prefix: default.s/ + GatherStats: true + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: key, value + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false + Execution mode: llap + LLAP IO: no inputs + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: s + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + column.name.delimiter , + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.s + numFiles 1 + numRows 0 + rawDataSize 0 + serialization.ddl struct s { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + column.name.delimiter , + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.s + numFiles 1 + numRows 0 + rawDataSize 0 + serialization.ddl struct s { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.s + name: default.s + Truncated Path -> Alias: + /s [s] + Reducer 2 + Execution mode: llap + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-2 + Stats-Aggr Operator + Stats Aggregation Key Prefix: default.s/ + + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.s + Is Table Level Stats: true + +PREHOOK: query: analyze table s compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@s +PREHOOK: Output: default@s +#### A masked pattern was here #### +POSTHOOK: query: analyze table s compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@s +POSTHOOK: Output: default@s +#### A masked pattern was here #### +PREHOOK: query: desc formatted s +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@s +POSTHOOK: query: desc formatted s +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@s +# col_name data_type comment + +key string default +value string default + +# Detailed Table Information +Database: default +#### A masked pattern was here #### +Retention: 0 +#### A masked pattern was here #### +Table Type: MANAGED_TABLE +Table Parameters: + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} + numFiles 1 + numRows 500 + rawDataSize 5312 + totalSize 5812 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: DROP TABLE IF EXISTS spart +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE IF EXISTS spart +POSTHOOK: type: DROPTABLE +PREHOOK: query: CREATE TABLE spart (key STRING COMMENT 'default', value STRING COMMENT 'default') +PARTITIONED BY (ds STRING, hr STRING) +STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@spart +POSTHOOK: query: CREATE TABLE spart (key STRING COMMENT 'default', value STRING COMMENT 'default') +PARTITIONED BY (ds STRING, hr STRING) +STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@spart +PREHOOK: query: LOAD DATA LOCAL INPATH "../../data/files/kv1.txt" +OVERWRITE INTO TABLE spart PARTITION (ds="2008-04-08", hr="11") +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@spart +POSTHOOK: query: LOAD DATA LOCAL INPATH "../../data/files/kv1.txt" +OVERWRITE INTO TABLE spart PARTITION (ds="2008-04-08", hr="11") +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@spart +POSTHOOK: Output: default@spart@ds=2008-04-08/hr=11 +PREHOOK: query: LOAD DATA LOCAL INPATH "../../data/files/kv1.txt" +OVERWRITE INTO TABLE spart PARTITION (ds="2008-04-08", hr="12") +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@spart +POSTHOOK: query: LOAD DATA LOCAL INPATH "../../data/files/kv1.txt" +OVERWRITE INTO TABLE spart PARTITION (ds="2008-04-08", hr="12") +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@spart +POSTHOOK: Output: default@spart@ds=2008-04-08/hr=12 +PREHOOK: query: desc formatted spart +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@spart +POSTHOOK: query: desc formatted spart +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@spart +# col_name data_type comment + +key string default +value string default + +# Partition Information +# col_name data_type comment + +ds string +hr string + +# Detailed Table Information +Database: default +#### A masked pattern was here #### +Retention: 0 +#### A masked pattern was here #### +Table Type: MANAGED_TABLE +Table Parameters: +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: explain extended analyze table spart compute statistics for columns +PREHOOK: type: QUERY +POSTHOOK: query: explain extended analyze table spart compute statistics for columns +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-0 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: spart + Statistics: Num rows: 58 Data size: 32968 Basic stats: COMPLETE Column stats: PARTIAL + Statistics Aggregation Key Prefix: default.spart/ + GatherStats: true + Select Operator + expressions: ds (type: string), hr (type: string), key (type: string), value (type: string) + outputColumnNames: ds, hr, key, value + Statistics: Num rows: 58 Data size: 32968 Basic stats: COMPLETE Column stats: PARTIAL + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 2 Data size: 2704 Basic stats: COMPLETE Column stats: PARTIAL + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + null sort order: aa + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 2 Data size: 2704 Basic stats: COMPLETE Column stats: PARTIAL + tag: -1 + value expressions: _col2 (type: struct), _col3 (type: struct) + auto parallelism: true + Execution mode: llap + LLAP IO: no inputs + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: hr=11 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + hr 11 + properties: + bucket_count -1 + column.name.delimiter , + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.spart + numFiles 1 + numRows 0 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 0 + serialization.ddl struct spart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + column.name.delimiter , + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.spart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct spart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.spart + name: default.spart +#### A masked pattern was here #### + Partition + base file name: hr=12 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + hr 12 + properties: + bucket_count -1 + column.name.delimiter , + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.spart + numFiles 1 + numRows 0 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 0 + serialization.ddl struct spart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + column.name.delimiter , + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.spart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct spart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.spart + name: default.spart + Truncated Path -> Alias: + /spart/ds=2008-04-08/hr=11 [spart] + /spart/ds=2008-04-08/hr=12 [spart] + Reducer 2 + Execution mode: llap + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 2 Data size: 2656 Basic stats: COMPLETE Column stats: PARTIAL + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 2 Data size: 2656 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 2 Data size: 2656 Basic stats: COMPLETE Column stats: PARTIAL +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types struct:struct:string:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-2 + Stats-Aggr Operator + Stats Aggregation Key Prefix: default.spart/ + + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.spart + Is Table Level Stats: false + +PREHOOK: query: analyze table spart compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@spart +PREHOOK: Input: default@spart@ds=2008-04-08/hr=11 +PREHOOK: Input: default@spart@ds=2008-04-08/hr=12 +PREHOOK: Output: default@spart +PREHOOK: Output: default@spart@ds=2008-04-08/hr=11 +PREHOOK: Output: default@spart@ds=2008-04-08/hr=12 +#### A masked pattern was here #### +POSTHOOK: query: analyze table spart compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@spart +POSTHOOK: Input: default@spart@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@spart@ds=2008-04-08/hr=12 +POSTHOOK: Output: default@spart +POSTHOOK: Output: default@spart@ds=2008-04-08/hr=11 +POSTHOOK: Output: default@spart@ds=2008-04-08/hr=12 +#### A masked pattern was here #### +PREHOOK: query: desc formatted spart +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@spart +POSTHOOK: query: desc formatted spart +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@spart +# col_name data_type comment + +key string default +value string default + +# Partition Information +# col_name data_type comment + +ds string +hr string + +# Detailed Table Information +Database: default +#### A masked pattern was here #### +Retention: 0 +#### A masked pattern was here #### +Table Type: MANAGED_TABLE +Table Parameters: +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: desc formatted spart PARTITION(ds='2008-04-08', hr=11) +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@spart +POSTHOOK: query: desc formatted spart PARTITION(ds='2008-04-08', hr=11) +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@spart +# col_name data_type comment + +key string default +value string default + +# Partition Information +# col_name data_type comment + +ds string +hr string + +# Detailed Partition Information +Partition Value: [2008-04-08, 11] +Database: default +Table: spart +#### A masked pattern was here #### +Partition Parameters: + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} + numFiles 1 + numRows 500 + rawDataSize 5312 + totalSize 5812 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: desc formatted spart PARTITION(ds='2008-04-08', hr=12) +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@spart +POSTHOOK: query: desc formatted spart PARTITION(ds='2008-04-08', hr=12) +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@spart +# col_name data_type comment + +key string default +value string default + +# Partition Information +# col_name data_type comment + +ds string +hr string + +# Detailed Partition Information +Partition Value: [2008-04-08, 12] +Database: default +Table: spart +#### A masked pattern was here #### +Partition Parameters: + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} + numFiles 1 + numRows 500 + rawDataSize 5312 + totalSize 5812 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: DROP TABLE IF EXISTS spart +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@spart +PREHOOK: Output: default@spart +POSTHOOK: query: DROP TABLE IF EXISTS spart +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@spart +POSTHOOK: Output: default@spart +PREHOOK: query: CREATE TABLE spart (key STRING COMMENT 'default', value STRING COMMENT 'default') +PARTITIONED BY (ds STRING, hr STRING) +STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@spart +POSTHOOK: query: CREATE TABLE spart (key STRING COMMENT 'default', value STRING COMMENT 'default') +PARTITIONED BY (ds STRING, hr STRING) +STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@spart +PREHOOK: query: LOAD DATA LOCAL INPATH "../../data/files/kv1.txt" +OVERWRITE INTO TABLE spart PARTITION (ds="2008-04-08", hr="11") +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@spart +POSTHOOK: query: LOAD DATA LOCAL INPATH "../../data/files/kv1.txt" +OVERWRITE INTO TABLE spart PARTITION (ds="2008-04-08", hr="11") +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@spart +POSTHOOK: Output: default@spart@ds=2008-04-08/hr=11 +PREHOOK: query: LOAD DATA LOCAL INPATH "../../data/files/kv1.txt" +OVERWRITE INTO TABLE spart PARTITION (ds="2008-04-08", hr="12") +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@spart +POSTHOOK: query: LOAD DATA LOCAL INPATH "../../data/files/kv1.txt" +OVERWRITE INTO TABLE spart PARTITION (ds="2008-04-08", hr="12") +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@spart +POSTHOOK: Output: default@spart@ds=2008-04-08/hr=12 +PREHOOK: query: desc formatted spart +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@spart +POSTHOOK: query: desc formatted spart +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@spart +# col_name data_type comment + +key string default +value string default + +# Partition Information +# col_name data_type comment + +ds string +hr string + +# Detailed Table Information +Database: default +#### A masked pattern was here #### +Retention: 0 +#### A masked pattern was here #### +Table Type: MANAGED_TABLE +Table Parameters: +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: explain extended analyze table spart partition(ds,hr) compute statistics for columns +PREHOOK: type: QUERY +POSTHOOK: query: explain extended analyze table spart partition(ds,hr) compute statistics for columns +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-0 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: spart + Statistics: Num rows: 58 Data size: 32968 Basic stats: COMPLETE Column stats: PARTIAL + Statistics Aggregation Key Prefix: default.spart/ + GatherStats: true + Select Operator + expressions: ds (type: string), hr (type: string), key (type: string), value (type: string) + outputColumnNames: ds, hr, key, value + Statistics: Num rows: 58 Data size: 32968 Basic stats: COMPLETE Column stats: PARTIAL + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 2 Data size: 2704 Basic stats: COMPLETE Column stats: PARTIAL + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + null sort order: aa + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 2 Data size: 2704 Basic stats: COMPLETE Column stats: PARTIAL + tag: -1 + value expressions: _col2 (type: struct), _col3 (type: struct) + auto parallelism: true + Execution mode: llap + LLAP IO: no inputs + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: hr=11 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + hr 11 + properties: + bucket_count -1 + column.name.delimiter , + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.spart + numFiles 1 + numRows 0 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 0 + serialization.ddl struct spart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + column.name.delimiter , + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.spart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct spart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.spart + name: default.spart +#### A masked pattern was here #### + Partition + base file name: hr=12 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + hr 12 + properties: + bucket_count -1 + column.name.delimiter , + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.spart + numFiles 1 + numRows 0 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 0 + serialization.ddl struct spart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + column.name.delimiter , + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.spart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct spart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.spart + name: default.spart + Truncated Path -> Alias: + /spart/ds=2008-04-08/hr=11 [spart] + /spart/ds=2008-04-08/hr=12 [spart] + Reducer 2 + Execution mode: llap + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 2 Data size: 2656 Basic stats: COMPLETE Column stats: PARTIAL + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 2 Data size: 2656 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 2 Data size: 2656 Basic stats: COMPLETE Column stats: PARTIAL +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types struct:struct:string:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-2 + Stats-Aggr Operator + Stats Aggregation Key Prefix: default.spart/ + + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.spart + Is Table Level Stats: false + +PREHOOK: query: analyze table spart partition(ds,hr) compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@spart +PREHOOK: Input: default@spart@ds=2008-04-08/hr=11 +PREHOOK: Input: default@spart@ds=2008-04-08/hr=12 +PREHOOK: Output: default@spart +PREHOOK: Output: default@spart@ds=2008-04-08/hr=11 +PREHOOK: Output: default@spart@ds=2008-04-08/hr=12 +#### A masked pattern was here #### +POSTHOOK: query: analyze table spart partition(ds,hr) compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@spart +POSTHOOK: Input: default@spart@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@spart@ds=2008-04-08/hr=12 +POSTHOOK: Output: default@spart +POSTHOOK: Output: default@spart@ds=2008-04-08/hr=11 +POSTHOOK: Output: default@spart@ds=2008-04-08/hr=12 +#### A masked pattern was here #### +PREHOOK: query: desc formatted spart +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@spart +POSTHOOK: query: desc formatted spart +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@spart +# col_name data_type comment + +key string default +value string default + +# Partition Information +# col_name data_type comment + +ds string +hr string + +# Detailed Table Information +Database: default +#### A masked pattern was here #### +Retention: 0 +#### A masked pattern was here #### +Table Type: MANAGED_TABLE +Table Parameters: +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: desc formatted spart PARTITION(ds='2008-04-08', hr=11) +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@spart +POSTHOOK: query: desc formatted spart PARTITION(ds='2008-04-08', hr=11) +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@spart +# col_name data_type comment + +key string default +value string default + +# Partition Information +# col_name data_type comment + +ds string +hr string + +# Detailed Partition Information +Partition Value: [2008-04-08, 11] +Database: default +Table: spart +#### A masked pattern was here #### +Partition Parameters: + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} + numFiles 1 + numRows 500 + rawDataSize 5312 + totalSize 5812 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: desc formatted spart PARTITION(ds='2008-04-08', hr=12) +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@spart +POSTHOOK: query: desc formatted spart PARTITION(ds='2008-04-08', hr=12) +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@spart +# col_name data_type comment + +key string default +value string default + +# Partition Information +# col_name data_type comment + +ds string +hr string + +# Detailed Partition Information +Partition Value: [2008-04-08, 12] +Database: default +Table: spart +#### A masked pattern was here #### +Partition Parameters: + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} + numFiles 1 + numRows 500 + rawDataSize 5312 + totalSize 5812 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: DROP TABLE IF EXISTS spart +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@spart +PREHOOK: Output: default@spart +POSTHOOK: query: DROP TABLE IF EXISTS spart +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@spart +POSTHOOK: Output: default@spart +PREHOOK: query: CREATE TABLE spart (key STRING COMMENT 'default', value STRING COMMENT 'default') +PARTITIONED BY (ds STRING, hr STRING) +STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@spart +POSTHOOK: query: CREATE TABLE spart (key STRING COMMENT 'default', value STRING COMMENT 'default') +PARTITIONED BY (ds STRING, hr STRING) +STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@spart +PREHOOK: query: LOAD DATA LOCAL INPATH "../../data/files/kv1.txt" +OVERWRITE INTO TABLE spart PARTITION (ds="2008-04-08", hr="11") +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@spart +POSTHOOK: query: LOAD DATA LOCAL INPATH "../../data/files/kv1.txt" +OVERWRITE INTO TABLE spart PARTITION (ds="2008-04-08", hr="11") +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@spart +POSTHOOK: Output: default@spart@ds=2008-04-08/hr=11 +PREHOOK: query: LOAD DATA LOCAL INPATH "../../data/files/kv1.txt" +OVERWRITE INTO TABLE spart PARTITION (ds="2008-04-08", hr="12") +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@spart +POSTHOOK: query: LOAD DATA LOCAL INPATH "../../data/files/kv1.txt" +OVERWRITE INTO TABLE spart PARTITION (ds="2008-04-08", hr="12") +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@spart +POSTHOOK: Output: default@spart@ds=2008-04-08/hr=12 +PREHOOK: query: desc formatted spart +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@spart +POSTHOOK: query: desc formatted spart +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@spart +# col_name data_type comment + +key string default +value string default + +# Partition Information +# col_name data_type comment + +ds string +hr string + +# Detailed Table Information +Database: default +#### A masked pattern was here #### +Retention: 0 +#### A masked pattern was here #### +Table Type: MANAGED_TABLE +Table Parameters: +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: explain extended analyze table spart partition(hr="11") compute statistics for columns +PREHOOK: type: QUERY +POSTHOOK: query: explain extended analyze table spart partition(hr="11") compute statistics for columns +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-0 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: spart + Statistics: Num rows: 29 Data size: 11148 Basic stats: COMPLETE Column stats: PARTIAL + Statistics Aggregation Key Prefix: default.spart/ + GatherStats: true + Select Operator + expressions: ds (type: string), key (type: string), value (type: string) + outputColumnNames: ds, key, value + Statistics: Num rows: 29 Data size: 11148 Basic stats: COMPLETE Column stats: PARTIAL + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string), '11' (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1254 Basic stats: COMPLETE Column stats: PARTIAL + Reduce Output Operator + key expressions: _col0 (type: string), '11' (type: string) + null sort order: aa + sort order: ++ + Map-reduce partition columns: _col0 (type: string), '11' (type: string) + Statistics: Num rows: 1 Data size: 1254 Basic stats: COMPLETE Column stats: PARTIAL + tag: -1 + value expressions: _col2 (type: struct), _col3 (type: struct) + auto parallelism: true + Execution mode: llap + LLAP IO: no inputs + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: hr=11 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + hr 11 + properties: + bucket_count -1 + column.name.delimiter , + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.spart + numFiles 1 + numRows 0 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 0 + serialization.ddl struct spart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + column.name.delimiter , + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.spart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct spart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.spart + name: default.spart + Truncated Path -> Alias: + /spart/ds=2008-04-08/hr=11 [spart] + Reducer 2 + Execution mode: llap + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), '11' (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1230 Basic stats: COMPLETE Column stats: PARTIAL + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), '11' (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1230 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1230 Basic stats: COMPLETE Column stats: PARTIAL +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types struct:struct:string:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-2 + Stats-Aggr Operator + Stats Aggregation Key Prefix: default.spart/ + + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.spart + Is Table Level Stats: false + +PREHOOK: query: analyze table spart partition(hr="11") compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@spart +PREHOOK: Input: default@spart@ds=2008-04-08/hr=11 +PREHOOK: Output: default@spart +PREHOOK: Output: default@spart@ds=2008-04-08/hr=11 +#### A masked pattern was here #### +POSTHOOK: query: analyze table spart partition(hr="11") compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@spart +POSTHOOK: Input: default@spart@ds=2008-04-08/hr=11 +POSTHOOK: Output: default@spart +POSTHOOK: Output: default@spart@ds=2008-04-08/hr=11 +#### A masked pattern was here #### +PREHOOK: query: desc formatted spart +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@spart +POSTHOOK: query: desc formatted spart +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@spart +# col_name data_type comment + +key string default +value string default + +# Partition Information +# col_name data_type comment + +ds string +hr string + +# Detailed Table Information +Database: default +#### A masked pattern was here #### +Retention: 0 +#### A masked pattern was here #### +Table Type: MANAGED_TABLE +Table Parameters: +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: desc formatted spart PARTITION(ds='2008-04-08', hr=11) +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@spart +POSTHOOK: query: desc formatted spart PARTITION(ds='2008-04-08', hr=11) +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@spart +# col_name data_type comment + +key string default +value string default + +# Partition Information +# col_name data_type comment + +ds string +hr string + +# Detailed Partition Information +Partition Value: [2008-04-08, 11] +Database: default +Table: spart +#### A masked pattern was here #### +Partition Parameters: + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} + numFiles 1 + numRows 500 + rawDataSize 5312 + totalSize 5812 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: desc formatted spart PARTITION(ds='2008-04-08', hr=12) +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@spart +POSTHOOK: query: desc formatted spart PARTITION(ds='2008-04-08', hr=12) +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@spart +# col_name data_type comment + +key string default +value string default + +# Partition Information +# col_name data_type comment + +ds string +hr string + +# Detailed Partition Information +Partition Value: [2008-04-08, 12] +Database: default +Table: spart +#### A masked pattern was here #### +Partition Parameters: + numFiles 1 + numRows 0 + rawDataSize 0 + totalSize 5812 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 diff --git a/ql/src/test/results/clientpositive/llap/column_table_stats_orc.q.out b/ql/src/test/results/clientpositive/llap/column_table_stats_orc.q.out new file mode 100644 index 0000000..f038350 --- /dev/null +++ b/ql/src/test/results/clientpositive/llap/column_table_stats_orc.q.out @@ -0,0 +1,969 @@ +PREHOOK: query: DROP TABLE IF EXISTS s +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE IF EXISTS s +POSTHOOK: type: DROPTABLE +PREHOOK: query: CREATE TABLE s (key STRING COMMENT 'default', value STRING COMMENT 'default') STORED AS ORC +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@s +POSTHOOK: query: CREATE TABLE s (key STRING COMMENT 'default', value STRING COMMENT 'default') STORED AS ORC +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@s +PREHOOK: query: insert into table s values ('1','2') +PREHOOK: type: QUERY +PREHOOK: Output: default@s +POSTHOOK: query: insert into table s values ('1','2') +POSTHOOK: type: QUERY +POSTHOOK: Output: default@s +POSTHOOK: Lineage: s.key SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: s.value SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +PREHOOK: query: desc formatted s +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@s +POSTHOOK: query: desc formatted s +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@s +# col_name data_type comment + +key string default +value string default + +# Detailed Table Information +Database: default +#### A masked pattern was here #### +Retention: 0 +#### A masked pattern was here #### +Table Type: MANAGED_TABLE +Table Parameters: + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + numFiles 1 + numRows 1 + rawDataSize 170 + totalSize 273 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.ql.io.orc.OrcSerde +InputFormat: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: explain extended analyze table s compute statistics for columns +PREHOOK: type: QUERY +POSTHOOK: query: explain extended analyze table s compute statistics for columns +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-0 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 1 Data size: 170 Basic stats: COMPLETE Column stats: NONE + Statistics Aggregation Key Prefix: default.s/ + GatherStats: true + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1 Data size: 170 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false + Execution mode: llap + LLAP IO: all inputs + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: s + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + bucket_count -1 + column.name.delimiter , + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.s + numFiles 1 + numRows 1 + rawDataSize 170 + serialization.ddl struct s { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 273 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + bucket_count -1 + column.name.delimiter , + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.s + numFiles 1 + numRows 1 + rawDataSize 170 + serialization.ddl struct s { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 273 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.s + name: default.s + Truncated Path -> Alias: + /s [s] + Reducer 2 + Execution mode: llap + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-2 + Stats-Aggr Operator + + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.s + Is Table Level Stats: true + +PREHOOK: query: analyze table s compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@s +PREHOOK: Output: default@s +#### A masked pattern was here #### +POSTHOOK: query: analyze table s compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@s +POSTHOOK: Output: default@s +#### A masked pattern was here #### +PREHOOK: query: desc formatted s +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@s +POSTHOOK: query: desc formatted s +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@s +# col_name data_type comment + +key string default +value string default + +# Detailed Table Information +Database: default +#### A masked pattern was here #### +Retention: 0 +#### A masked pattern was here #### +Table Type: MANAGED_TABLE +Table Parameters: + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} + numFiles 1 + numRows 1 + rawDataSize 170 + totalSize 273 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.ql.io.orc.OrcSerde +InputFormat: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: DROP TABLE IF EXISTS spart +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE IF EXISTS spart +POSTHOOK: type: DROPTABLE +PREHOOK: query: CREATE TABLE spart (key STRING COMMENT 'default', value STRING COMMENT 'default') +PARTITIONED BY (ds STRING, hr STRING) +STORED AS ORC +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@spart +POSTHOOK: query: CREATE TABLE spart (key STRING COMMENT 'default', value STRING COMMENT 'default') +PARTITIONED BY (ds STRING, hr STRING) +STORED AS ORC +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@spart +PREHOOK: query: insert into table spart PARTITION (ds="2008-04-08", hr="12") values ('1','2') +PREHOOK: type: QUERY +PREHOOK: Output: default@spart@ds=2008-04-08/hr=12 +POSTHOOK: query: insert into table spart PARTITION (ds="2008-04-08", hr="12") values ('1','2') +POSTHOOK: type: QUERY +POSTHOOK: Output: default@spart@ds=2008-04-08/hr=12 +POSTHOOK: Lineage: spart PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: spart PARTITION(ds=2008-04-08,hr=12).value SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +PREHOOK: query: insert into table spart PARTITION (ds="2008-04-08", hr="11") values ('1','2') +PREHOOK: type: QUERY +PREHOOK: Output: default@spart@ds=2008-04-08/hr=11 +POSTHOOK: query: insert into table spart PARTITION (ds="2008-04-08", hr="11") values ('1','2') +POSTHOOK: type: QUERY +POSTHOOK: Output: default@spart@ds=2008-04-08/hr=11 +POSTHOOK: Lineage: spart PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: spart PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +PREHOOK: query: desc formatted spart +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@spart +POSTHOOK: query: desc formatted spart +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@spart +# col_name data_type comment + +key string default +value string default + +# Partition Information +# col_name data_type comment + +ds string +hr string + +# Detailed Table Information +Database: default +#### A masked pattern was here #### +Retention: 0 +#### A masked pattern was here #### +Table Type: MANAGED_TABLE +Table Parameters: +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.ql.io.orc.OrcSerde +InputFormat: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: explain extended analyze table spart compute statistics for columns +PREHOOK: type: QUERY +POSTHOOK: query: explain extended analyze table spart compute statistics for columns +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-0 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: spart + Statistics: Num rows: 2 Data size: 1076 Basic stats: COMPLETE Column stats: PARTIAL + Statistics Aggregation Key Prefix: default.spart/ + GatherStats: true + Select Operator + expressions: ds (type: string), hr (type: string), key (type: string), value (type: string) + outputColumnNames: ds, hr, key, value + Statistics: Num rows: 2 Data size: 1076 Basic stats: COMPLETE Column stats: PARTIAL + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1352 Basic stats: COMPLETE Column stats: PARTIAL + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + null sort order: aa + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 1 Data size: 1352 Basic stats: COMPLETE Column stats: PARTIAL + tag: -1 + value expressions: _col2 (type: struct), _col3 (type: struct) + auto parallelism: true + Execution mode: llap + LLAP IO: all inputs + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: hr=11 + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + partition values: + ds 2008-04-08 + hr 11 + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + bucket_count -1 +#### A masked pattern was here #### + name default.spart + numFiles 1 + numRows 1 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 170 + serialization.ddl struct spart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 273 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + column.name.delimiter , + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.spart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct spart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.spart + name: default.spart +#### A masked pattern was here #### + Partition + base file name: hr=12 + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + partition values: + ds 2008-04-08 + hr 12 + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + bucket_count -1 +#### A masked pattern was here #### + name default.spart + numFiles 1 + numRows 1 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 170 + serialization.ddl struct spart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 273 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + column.name.delimiter , + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.spart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct spart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.spart + name: default.spart + Truncated Path -> Alias: + /spart/ds=2008-04-08/hr=11 [spart] + /spart/ds=2008-04-08/hr=12 [spart] + Reducer 2 + Execution mode: llap + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1328 Basic stats: COMPLETE Column stats: PARTIAL + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1328 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1328 Basic stats: COMPLETE Column stats: PARTIAL +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types struct:struct:string:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-2 + Stats-Aggr Operator + + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.spart + Is Table Level Stats: false + +PREHOOK: query: analyze table spart compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@spart +PREHOOK: Input: default@spart@ds=2008-04-08/hr=11 +PREHOOK: Input: default@spart@ds=2008-04-08/hr=12 +PREHOOK: Output: default@spart +PREHOOK: Output: default@spart@ds=2008-04-08/hr=11 +PREHOOK: Output: default@spart@ds=2008-04-08/hr=12 +#### A masked pattern was here #### +POSTHOOK: query: analyze table spart compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@spart +POSTHOOK: Input: default@spart@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@spart@ds=2008-04-08/hr=12 +POSTHOOK: Output: default@spart +POSTHOOK: Output: default@spart@ds=2008-04-08/hr=11 +POSTHOOK: Output: default@spart@ds=2008-04-08/hr=12 +#### A masked pattern was here #### +PREHOOK: query: desc formatted spart +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@spart +POSTHOOK: query: desc formatted spart +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@spart +# col_name data_type comment + +key string default +value string default + +# Partition Information +# col_name data_type comment + +ds string +hr string + +# Detailed Table Information +Database: default +#### A masked pattern was here #### +Retention: 0 +#### A masked pattern was here #### +Table Type: MANAGED_TABLE +Table Parameters: +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.ql.io.orc.OrcSerde +InputFormat: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: desc formatted spart PARTITION(ds='2008-04-08', hr=11) +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@spart +POSTHOOK: query: desc formatted spart PARTITION(ds='2008-04-08', hr=11) +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@spart +# col_name data_type comment + +key string default +value string default + +# Partition Information +# col_name data_type comment + +ds string +hr string + +# Detailed Partition Information +Partition Value: [2008-04-08, 11] +Database: default +Table: spart +#### A masked pattern was here #### +Partition Parameters: + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} + numFiles 1 + numRows 1 + rawDataSize 170 + totalSize 273 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.ql.io.orc.OrcSerde +InputFormat: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: desc formatted spart PARTITION(ds='2008-04-08', hr=12) +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@spart +POSTHOOK: query: desc formatted spart PARTITION(ds='2008-04-08', hr=12) +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@spart +# col_name data_type comment + +key string default +value string default + +# Partition Information +# col_name data_type comment + +ds string +hr string + +# Detailed Partition Information +Partition Value: [2008-04-08, 12] +Database: default +Table: spart +#### A masked pattern was here #### +Partition Parameters: + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} + numFiles 1 + numRows 1 + rawDataSize 170 + totalSize 273 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.ql.io.orc.OrcSerde +InputFormat: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: DROP TABLE IF EXISTS spart +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@spart +PREHOOK: Output: default@spart +POSTHOOK: query: DROP TABLE IF EXISTS spart +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@spart +POSTHOOK: Output: default@spart +PREHOOK: query: CREATE TABLE spart (key STRING COMMENT 'default', value STRING COMMENT 'default') +PARTITIONED BY (ds STRING, hr STRING) +STORED AS ORC +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@spart +POSTHOOK: query: CREATE TABLE spart (key STRING COMMENT 'default', value STRING COMMENT 'default') +PARTITIONED BY (ds STRING, hr STRING) +STORED AS ORC +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@spart +PREHOOK: query: insert into table spart PARTITION (ds="2008-04-08", hr="12") values ('1','2') +PREHOOK: type: QUERY +PREHOOK: Output: default@spart@ds=2008-04-08/hr=12 +POSTHOOK: query: insert into table spart PARTITION (ds="2008-04-08", hr="12") values ('1','2') +POSTHOOK: type: QUERY +POSTHOOK: Output: default@spart@ds=2008-04-08/hr=12 +POSTHOOK: Lineage: spart PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: spart PARTITION(ds=2008-04-08,hr=12).value SIMPLE [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +PREHOOK: query: insert into table spart PARTITION (ds="2008-04-08", hr="11") values ('1','2') +PREHOOK: type: QUERY +PREHOOK: Output: default@spart@ds=2008-04-08/hr=11 +POSTHOOK: query: insert into table spart PARTITION (ds="2008-04-08", hr="11") values ('1','2') +POSTHOOK: type: QUERY +POSTHOOK: Output: default@spart@ds=2008-04-08/hr=11 +POSTHOOK: Lineage: spart PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: spart PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +PREHOOK: query: desc formatted spart +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@spart +POSTHOOK: query: desc formatted spart +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@spart +# col_name data_type comment + +key string default +value string default + +# Partition Information +# col_name data_type comment + +ds string +hr string + +# Detailed Table Information +Database: default +#### A masked pattern was here #### +Retention: 0 +#### A masked pattern was here #### +Table Type: MANAGED_TABLE +Table Parameters: +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.ql.io.orc.OrcSerde +InputFormat: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: explain extended analyze table spart partition(hr="11") compute statistics for columns +PREHOOK: type: QUERY +POSTHOOK: query: explain extended analyze table spart partition(hr="11") compute statistics for columns +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-0 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: spart + Statistics: Num rows: 1 Data size: 354 Basic stats: COMPLETE Column stats: PARTIAL + Statistics Aggregation Key Prefix: default.spart/ + GatherStats: true + Select Operator + expressions: ds (type: string), key (type: string), value (type: string) + outputColumnNames: ds, key, value + Statistics: Num rows: 1 Data size: 354 Basic stats: COMPLETE Column stats: PARTIAL + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string), '11' (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1254 Basic stats: COMPLETE Column stats: PARTIAL + Reduce Output Operator + key expressions: _col0 (type: string), '11' (type: string) + null sort order: aa + sort order: ++ + Map-reduce partition columns: _col0 (type: string), '11' (type: string) + Statistics: Num rows: 1 Data size: 1254 Basic stats: COMPLETE Column stats: PARTIAL + tag: -1 + value expressions: _col2 (type: struct), _col3 (type: struct) + auto parallelism: true + Execution mode: llap + LLAP IO: all inputs + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: hr=11 + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + partition values: + ds 2008-04-08 + hr 11 + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + bucket_count -1 +#### A masked pattern was here #### + name default.spart + numFiles 1 + numRows 1 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 170 + serialization.ddl struct spart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 273 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + column.name.delimiter , + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.spart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct spart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.spart + name: default.spart + Truncated Path -> Alias: + /spart/ds=2008-04-08/hr=11 [spart] + Reducer 2 + Execution mode: llap + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), '11' (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1230 Basic stats: COMPLETE Column stats: PARTIAL + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), '11' (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1230 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1230 Basic stats: COMPLETE Column stats: PARTIAL +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types struct:struct:string:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-2 + Stats-Aggr Operator + + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.spart + Is Table Level Stats: false + +PREHOOK: query: analyze table spart partition(hr="11") compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@spart +PREHOOK: Input: default@spart@ds=2008-04-08/hr=11 +PREHOOK: Output: default@spart +PREHOOK: Output: default@spart@ds=2008-04-08/hr=11 +#### A masked pattern was here #### +POSTHOOK: query: analyze table spart partition(hr="11") compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@spart +POSTHOOK: Input: default@spart@ds=2008-04-08/hr=11 +POSTHOOK: Output: default@spart +POSTHOOK: Output: default@spart@ds=2008-04-08/hr=11 +#### A masked pattern was here #### +PREHOOK: query: desc formatted spart +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@spart +POSTHOOK: query: desc formatted spart +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@spart +# col_name data_type comment + +key string default +value string default + +# Partition Information +# col_name data_type comment + +ds string +hr string + +# Detailed Table Information +Database: default +#### A masked pattern was here #### +Retention: 0 +#### A masked pattern was here #### +Table Type: MANAGED_TABLE +Table Parameters: +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.ql.io.orc.OrcSerde +InputFormat: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: desc formatted spart PARTITION(ds='2008-04-08', hr=11) +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@spart +POSTHOOK: query: desc formatted spart PARTITION(ds='2008-04-08', hr=11) +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@spart +# col_name data_type comment + +key string default +value string default + +# Partition Information +# col_name data_type comment + +ds string +hr string + +# Detailed Partition Information +Partition Value: [2008-04-08, 11] +Database: default +Table: spart +#### A masked pattern was here #### +Partition Parameters: + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} + numFiles 1 + numRows 1 + rawDataSize 170 + totalSize 273 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.ql.io.orc.OrcSerde +InputFormat: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: desc formatted spart PARTITION(ds='2008-04-08', hr=12) +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@spart +POSTHOOK: query: desc formatted spart PARTITION(ds='2008-04-08', hr=12) +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@spart +# col_name data_type comment + +key string default +value string default + +# Partition Information +# col_name data_type comment + +ds string +hr string + +# Detailed Partition Information +Partition Value: [2008-04-08, 12] +Database: default +Table: spart +#### A masked pattern was here #### +Partition Parameters: + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + numFiles 1 + numRows 1 + rawDataSize 170 + totalSize 273 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.ql.io.orc.OrcSerde +InputFormat: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 diff --git a/ql/src/test/results/clientpositive/perf/query14.q.out b/ql/src/test/results/clientpositive/perf/query14.q.out index 9821180..051d837 100644 --- a/ql/src/test/results/clientpositive/perf/query14.q.out +++ b/ql/src/test/results/clientpositive/perf/query14.q.out @@ -1,7 +1,7 @@ -Warning: Shuffle Join MERGEJOIN[916][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 114' is a cross product -Warning: Shuffle Join MERGEJOIN[917][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Stage 'Reducer 115' is a cross product Warning: Shuffle Join MERGEJOIN[914][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 61' is a cross product Warning: Shuffle Join MERGEJOIN[915][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Stage 'Reducer 62' is a cross product +Warning: Shuffle Join MERGEJOIN[916][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 114' is a cross product +Warning: Shuffle Join MERGEJOIN[917][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Stage 'Reducer 115' is a cross product Warning: Shuffle Join MERGEJOIN[912][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 5' is a cross product Warning: Shuffle Join MERGEJOIN[913][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Stage 'Reducer 6' is a cross product PREHOOK: query: explain diff --git a/ql/src/test/results/clientpositive/tez/explainanalyze_3.q.out b/ql/src/test/results/clientpositive/tez/explainanalyze_3.q.out index 20c330a..32609eb 100644 --- a/ql/src/test/results/clientpositive/tez/explainanalyze_3.q.out +++ b/ql/src/test/results/clientpositive/tez/explainanalyze_3.q.out @@ -234,10 +234,12 @@ Stage-2 PREHOOK: query: analyze table src_stats compute statistics for columns PREHOOK: type: QUERY PREHOOK: Input: default@src_stats +PREHOOK: Output: default@src_stats #### A masked pattern was here #### POSTHOOK: query: analyze table src_stats compute statistics for columns POSTHOOK: type: QUERY POSTHOOK: Input: default@src_stats +POSTHOOK: Output: default@src_stats #### A masked pattern was here #### PREHOOK: query: explain analyze analyze table src_stats compute statistics for columns PREHOOK: type: QUERY @@ -246,19 +248,21 @@ POSTHOOK: type: QUERY Vertex dependency in root stage Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) -Stage-2 +Stage-3 Column Stats Work{} - Stage-0 - Reducer 2 - File Output Operator [FS_5] - Group By Operator [GBY_3] (rows=1/1 width=960) - Output:["_col0","_col1"],aggregations:["compute_stats(VALUE._col0, 16)","compute_stats(VALUE._col2, 16)"] - <-Map 1 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_2] - Select Operator [SEL_1] (rows=500/500 width=10) - Output:["key","value"] - TableScan [TS_0] (rows=500/500 width=10) - default@src_stats,src_stats,Tbl:COMPLETE,Col:NONE,Output:["key","value"] + Stage-2 + Stats-Aggr Operator + Stage-0 + Reducer 2 + File Output Operator [FS_5] + Group By Operator [GBY_3] (rows=1/1 width=960) + Output:["_col0","_col1"],aggregations:["compute_stats(VALUE._col0, 16)","compute_stats(VALUE._col2, 16)"] + <-Map 1 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_2] + Select Operator [SEL_1] (rows=500/500 width=10) + Output:["key","value"] + TableScan [TS_0] (rows=500/500 width=10) + default@src_stats,src_stats,Tbl:COMPLETE,Col:NONE,Output:["key","value"] PREHOOK: query: CREATE TEMPORARY MACRO SIGMOID (x DOUBLE) 1.0 / (1.0 + EXP(-x)) PREHOOK: type: CREATEMACRO diff --git a/ql/src/test/results/clientpositive/tez/explainanalyze_5.q.out b/ql/src/test/results/clientpositive/tez/explainanalyze_5.q.out index ee9affb..b35e294 100644 --- a/ql/src/test/results/clientpositive/tez/explainanalyze_5.q.out +++ b/ql/src/test/results/clientpositive/tez/explainanalyze_5.q.out @@ -36,10 +36,12 @@ Stage-2 PREHOOK: query: analyze table src_stats compute statistics for columns PREHOOK: type: QUERY PREHOOK: Input: default@src_stats +PREHOOK: Output: default@src_stats #### A masked pattern was here #### POSTHOOK: query: analyze table src_stats compute statistics for columns POSTHOOK: type: QUERY POSTHOOK: Input: default@src_stats +POSTHOOK: Output: default@src_stats #### A masked pattern was here #### PREHOOK: query: explain analyze analyze table src_stats compute statistics for columns PREHOOK: type: QUERY @@ -48,19 +50,21 @@ POSTHOOK: type: QUERY Vertex dependency in root stage Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) -Stage-2 +Stage-3 Column Stats Work{} - Stage-0 - Reducer 2 - File Output Operator [FS_5] - Group By Operator [GBY_3] (rows=1/1 width=960) - Output:["_col0","_col1"],aggregations:["compute_stats(VALUE._col0, 16)","compute_stats(VALUE._col2, 16)"] - <-Map 1 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_2] - Select Operator [SEL_1] (rows=500/500 width=10) - Output:["key","value"] - TableScan [TS_0] (rows=500/500 width=10) - default@src_stats,src_stats,Tbl:COMPLETE,Col:NONE,Output:["key","value"] + Stage-2 + Stats-Aggr Operator + Stage-0 + Reducer 2 + File Output Operator [FS_5] + Group By Operator [GBY_3] (rows=1/1 width=960) + Output:["_col0","_col1"],aggregations:["compute_stats(VALUE._col0, 16)","compute_stats(VALUE._col2, 16)"] + <-Map 1 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_2] + Select Operator [SEL_1] (rows=500/500 width=10) + Output:["key","value"] + TableScan [TS_0] (rows=500/500 width=10) + default@src_stats,src_stats,Tbl:COMPLETE,Col:NONE,Output:["key","value"] PREHOOK: query: drop table src_multi2 PREHOOK: type: DROPTABLE diff --git a/ql/src/test/results/clientpositive/tez/explainuser_3.q.out b/ql/src/test/results/clientpositive/tez/explainuser_3.q.out index 74e4693..da52b0a 100644 --- a/ql/src/test/results/clientpositive/tez/explainuser_3.q.out +++ b/ql/src/test/results/clientpositive/tez/explainuser_3.q.out @@ -19,10 +19,12 @@ POSTHOOK: Lineage: acid_vectorized.b SIMPLE [(alltypesorc)alltypesorc.FieldSchem PREHOOK: query: analyze table acid_vectorized compute statistics for columns PREHOOK: type: QUERY PREHOOK: Input: default@acid_vectorized +PREHOOK: Output: default@acid_vectorized #### A masked pattern was here #### POSTHOOK: query: analyze table acid_vectorized compute statistics for columns POSTHOOK: type: QUERY POSTHOOK: Input: default@acid_vectorized +POSTHOOK: Output: default@acid_vectorized #### A masked pattern was here #### PREHOOK: query: explain select a, b from acid_vectorized order by a, b PREHOOK: type: QUERY @@ -39,13 +41,13 @@ Stage-0 Stage-1 Reducer 2 vectorized File Output Operator [FS_8] - Select Operator [SEL_7] (rows=16 width=101) + Select Operator [SEL_7] (rows=10 width=101) Output:["_col0","_col1"] <-Map 1 [SIMPLE_EDGE] vectorized SHUFFLE [RS_6] - Select Operator [SEL_5] (rows=16 width=101) + Select Operator [SEL_5] (rows=10 width=101) Output:["_col0","_col1"] - TableScan [TS_0] (rows=16 width=101) + TableScan [TS_0] (rows=10 width=101) default@acid_vectorized,acid_vectorized, ACID table,Tbl:COMPLETE,Col:COMPLETE,Output:["a","b"] PREHOOK: query: explain select key, value @@ -200,21 +202,23 @@ POSTHOOK: type: QUERY Vertex dependency in root stage Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) -Stage-2 +Stage-3 Column Stats Work{} - Stage-0 - Reducer 2 - File Output Operator [FS_6] - Group By Operator [GBY_4] (rows=1 width=960) - Output:["_col0","_col1"],aggregations:["compute_stats(VALUE._col0)","compute_stats(VALUE._col1)"] - <-Map 1 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_3] - Group By Operator [GBY_2] (rows=1 width=984) - Output:["_col0","_col1"],aggregations:["compute_stats(key, 16)","compute_stats(value, 16)"] - Select Operator [SEL_1] (rows=500 width=178) - Output:["key","value"] - TableScan [TS_0] (rows=500 width=178) - default@src,src,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] + Stage-2 + Stats-Aggr Operator + Stage-0 + Reducer 2 + File Output Operator [FS_6] + Group By Operator [GBY_4] (rows=1 width=960) + Output:["_col0","_col1"],aggregations:["compute_stats(VALUE._col0)","compute_stats(VALUE._col1)"] + <-Map 1 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_3] + Group By Operator [GBY_2] (rows=1 width=984) + Output:["_col0","_col1"],aggregations:["compute_stats(key, 16)","compute_stats(value, 16)"] + Select Operator [SEL_1] (rows=500 width=178) + Output:["key","value"] + TableScan [TS_0] (rows=500 width=178) + default@src,src,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] PREHOOK: query: explain CREATE TEMPORARY MACRO SIGMOID (x DOUBLE) 1.0 / (1.0 + EXP(-x))