diff --git a/common/src/java/org/apache/hadoop/hive/common/StatsSetupConst.java b/common/src/java/org/apache/hadoop/hive/common/StatsSetupConst.java index 2ff76ee..d4fb163 100644 --- a/common/src/java/org/apache/hadoop/hive/common/StatsSetupConst.java +++ b/common/src/java/org/apache/hadoop/hive/common/StatsSetupConst.java @@ -106,6 +106,10 @@ public String getAggregator(Configuration conf) { // alterPartition/alterTable is happening via statsTask. public static final String STATS_GENERATED_VIA_STATS_TASK = "STATS_GENERATED_VIA_STATS_TASK"; + // This string constant is used to indicate to AlterHandler that + // alterPartition/alterTable is happening via user. + public static final String STATS_GENERATED_VIA_USER = "STATS_GENERATED_VIA_USER"; + // This string constant is used by AlterHandler to figure out that it should not attempt to // update stats. It is set by any client-side task which wishes to signal that no stats // update should take place, such as with replication. diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreUtils.java b/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreUtils.java index 432f7d0..64f5daf 100644 --- a/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreUtils.java +++ b/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreUtils.java @@ -231,8 +231,13 @@ public static boolean updateTableStatsFast(Table tbl, LOG.info("Updating table stats fast for " + tbl.getTableName()); populateQuickStats(fileStatus, params); LOG.info("Updated size of table " + tbl.getTableName() +" to "+ params.get(StatsSetupConst.TOTAL_SIZE)); - if(!params.containsKey(StatsSetupConst.STATS_GENERATED_VIA_STATS_TASK)) { - // invalidate stats requiring scan since this is a regular ddl alter case + if (params.containsKey(StatsSetupConst.STATS_GENERATED_VIA_USER)) { + params.remove(StatsSetupConst.STATS_GENERATED_VIA_USER); + //although we accept the number from a user, we assume that it is inaccurate. + params.put(StatsSetupConst.COLUMN_STATS_ACCURATE, StatsSetupConst.FALSE); + } else if (!params.containsKey(StatsSetupConst.STATS_GENERATED_VIA_STATS_TASK)) { + // invalidate stats requiring scan since this is a regular ddl alter + // case for (String stat : StatsSetupConst.statsRequireCompute) { params.put(stat, "-1"); } @@ -352,8 +357,14 @@ public static boolean updatePartitionStatsFast(PartitionSpecProxy.PartitionItera FileStatus[] fileStatus = wh.getFileStatusesForLocation(part.getLocation()); populateQuickStats(fileStatus, params); LOG.warn("Updated size to " + params.get(StatsSetupConst.TOTAL_SIZE)); - if(!params.containsKey(StatsSetupConst.STATS_GENERATED_VIA_STATS_TASK)) { - // invalidate stats requiring scan since this is a regular ddl alter case + if (params.containsKey(StatsSetupConst.STATS_GENERATED_VIA_USER)) { + params.remove(StatsSetupConst.STATS_GENERATED_VIA_USER); + // although we accept the number from a user, we assume that it is + // inaccurate. + params.put(StatsSetupConst.COLUMN_STATS_ACCURATE, StatsSetupConst.FALSE); + } else if (!params.containsKey(StatsSetupConst.STATS_GENERATED_VIA_STATS_TASK)) { + // invalidate stats requiring scan since this is a regular ddl alter + // case for (String stat : StatsSetupConst.statsRequireCompute) { params.put(stat, "-1"); } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java index ac0ecd9..b28f71a 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java @@ -3403,11 +3403,19 @@ private int alterTableOrSinglePartition(AlterTableDesc alterTbl, Table tbl, Part } sd.setCols(alterTbl.getNewCols()); } else if (alterTbl.getOp() == AlterTableDesc.AlterTableTypes.ADDPROPS) { - tbl.getTTable().getParameters().putAll(alterTbl.getProps()); + if (part != null) { + part.getTPartition().getParameters().putAll(alterTbl.getProps()); + } else { + tbl.getTTable().getParameters().putAll(alterTbl.getProps()); + } } else if (alterTbl.getOp() == AlterTableDesc.AlterTableTypes.DROPPROPS) { Iterator keyItr = alterTbl.getProps().keySet().iterator(); while (keyItr.hasNext()) { - tbl.getTTable().getParameters().remove(keyItr.next()); + if (part != null) { + part.getTPartition().getParameters().remove(keyItr.next()); + } else { + tbl.getTTable().getParameters().remove(keyItr.next()); + } } } else if (alterTbl.getOp() == AlterTableDesc.AlterTableTypes.ADDSERDEPROPS) { StorageDescriptor sd = (part == null ? tbl.getTTable().getSd() : part.getTPartition().getSd()); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java index 5e6b606..0585f4d 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java @@ -28,6 +28,7 @@ import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.common.FileUtils; import org.apache.hadoop.hive.common.JavaUtils; +import org.apache.hadoop.hive.common.StatsSetupConst; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.conf.HiveConf.ConfVars; import org.apache.hadoop.hive.metastore.MetaStoreUtils; @@ -98,6 +99,7 @@ import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc; import org.apache.hadoop.hive.ql.plan.FetchWork; +import org.apache.hadoop.hive.ql.plan.HiveOperation; import org.apache.hadoop.hive.ql.plan.ListBucketingCtx; import org.apache.hadoop.hive.ql.plan.LoadTableDesc; import org.apache.hadoop.hive.ql.plan.LockDatabaseDesc; @@ -286,9 +288,11 @@ public void analyzeInternal(ASTNode input) throws SemanticException { } else if (ast.getType() == HiveParser.TOK_ALTERTABLE_PARTCOLTYPE) { analyzeAlterTablePartColType(qualified, ast); } else if (ast.getType() == HiveParser.TOK_ALTERTABLE_PROPERTIES) { - analyzeAlterTableProps(qualified, ast, false, false); + analyzeAlterTableProps(qualified, null, ast, false, false); } else if (ast.getType() == HiveParser.TOK_ALTERTABLE_DROPPROPERTIES) { - analyzeAlterTableProps(qualified, ast, false, true); + analyzeAlterTableProps(qualified, null, ast, false, true); + } else if (ast.getType() == HiveParser.TOK_ALTERTABLE_UPDATESTATS) { + analyzeAlterTableProps(qualified, partSpec, ast, false, false); } else if (ast.getType() == HiveParser.TOK_ALTERTABLE_SKEWED) { analyzeAltertableSkewedby(qualified, ast); } else if (ast.getType() == HiveParser.TOK_ALTERTABLE_EXCHANGEPARTITION) { @@ -397,9 +401,9 @@ public void analyzeInternal(ASTNode input) throws SemanticException { String[] qualified = getQualifiedTableName((ASTNode) ast.getChild(0)); ast = (ASTNode) ast.getChild(1); if (ast.getType() == HiveParser.TOK_ALTERVIEW_PROPERTIES) { - analyzeAlterTableProps(qualified, ast, true, false); + analyzeAlterTableProps(qualified, null, ast, true, false); } else if (ast.getType() == HiveParser.TOK_ALTERVIEW_DROPPROPERTIES) { - analyzeAlterTableProps(qualified, ast, true, true); + analyzeAlterTableProps(qualified, null, ast, true, true); } else if (ast.getType() == HiveParser.TOK_ALTERVIEW_ADDPARTS) { analyzeAlterTableAddParts(qualified, ast, true); } else if (ast.getType() == HiveParser.TOK_ALTERVIEW_DROPPARTS) { @@ -1337,25 +1341,49 @@ private void validateAlterTableType(Table tbl, AlterTableTypes op, boolean expec } } - private void analyzeAlterTableProps(String[] qualified, ASTNode ast, boolean expectView, boolean isUnset) - throws SemanticException { + private void analyzeAlterTableProps(String[] qualified, HashMap partSpec, + ASTNode ast, boolean expectView, boolean isUnset) throws SemanticException { String tableName = getDotName(qualified); HashMap mapProp = getProps((ASTNode) (ast.getChild(0)) .getChild(0)); + if (SessionState.get().getCommandType() + .equals(HiveOperation.ALTERTABLE_UPDATETABLESTATS.getOperationName()) + || SessionState.get().getCommandType() + .equals(HiveOperation.ALTERTABLE_UPDATEPARTSTATS.getOperationName())) { + // we need to check if the properties are valid, especially for stats. + boolean changeStatsSucceeded = false; + for (Entry entry : mapProp.entrySet()) { + // we make sure that we do not change anything if there is anything + // wrong. + if (entry.getKey().equals(StatsSetupConst.ROW_COUNT) + || entry.getKey().equals(StatsSetupConst.RAW_DATA_SIZE)) { + try { + Long.parseLong(entry.getValue()); + changeStatsSucceeded = true; + } catch (Exception e) { + throw new SemanticException("AlterTable " + entry.getKey() + " failed with value " + + entry.getValue()); + } + } + } + if (changeStatsSucceeded) { + mapProp.put(StatsSetupConst.STATS_GENERATED_VIA_USER, StatsSetupConst.TRUE); + } + } AlterTableDesc alterTblDesc = null; if (isUnset == true) { - alterTblDesc = new AlterTableDesc(AlterTableTypes.DROPPROPS, expectView); + alterTblDesc = new AlterTableDesc(AlterTableTypes.DROPPROPS, partSpec, expectView); if (ast.getChild(1) != null) { alterTblDesc.setDropIfExists(true); } } else { - alterTblDesc = new AlterTableDesc(AlterTableTypes.ADDPROPS, expectView); + alterTblDesc = new AlterTableDesc(AlterTableTypes.ADDPROPS, partSpec, expectView); } alterTblDesc.setProps(mapProp); alterTblDesc.setOldName(tableName); - addInputsOutputsAlterTable(tableName, null, alterTblDesc); + addInputsOutputsAlterTable(tableName, partSpec, alterTblDesc); rootTasks.add(TaskFactory.get(new DDLWork(getInputs(), getOutputs(), alterTblDesc), conf)); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g b/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g index 5f14c6b..903d2f6 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g @@ -158,6 +158,7 @@ TOK_ALTERTABLE_UNARCHIVE; TOK_ALTERTABLE_SERDEPROPERTIES; TOK_ALTERTABLE_SERIALIZER; TOK_ALTERTABLE_UPDATECOLSTATS; +TOK_ALTERTABLE_UPDATESTATS; TOK_TABLE_PARTITION; TOK_ALTERTABLE_FILEFORMAT; TOK_ALTERTABLE_LOCATION; @@ -1016,6 +1017,7 @@ alterTableStatementSuffix | alterStatementSuffixArchive | alterStatementSuffixUnArchive | alterStatementSuffixProperties + | alterStatementSuffixUpdateStats | alterStatementSuffixSkewedby | alterStatementSuffixExchangePartition | alterStatementPartitionKeyType @@ -1035,6 +1037,7 @@ alterTblPartitionStatementSuffix | alterStatementSuffixClusterbySortby | alterStatementSuffixCompact | alterStatementSuffixUpdateStatsCol + | alterStatementSuffixUpdateStats | alterStatementSuffixRenameCol | alterStatementSuffixAddCol ; @@ -1121,6 +1124,13 @@ alterStatementSuffixUpdateStatsCol ->^(TOK_ALTERTABLE_UPDATECOLSTATS $colName tableProperties $comment?) ; +alterStatementSuffixUpdateStats +@init { pushMsg("update basic statistics", state); } +@after { popMsg(state); } + : KW_UPDATE KW_STATISTICS KW_SET tableProperties + ->^(TOK_ALTERTABLE_UPDATESTATS tableProperties) + ; + alterStatementChangeColPosition : first=KW_FIRST|KW_AFTER afterCol=identifier ->{$first != null}? ^(TOK_ALTERTABLE_CHANGECOL_AFTER_POSITION ) diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzerFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzerFactory.java index 98860c6..7909e70 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzerFactory.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzerFactory.java @@ -152,6 +152,9 @@ tablePartitionCommandType.put(HiveParser.TOK_ALTERTABLE_UPDATECOLSTATS, new HiveOperation[] {HiveOperation.ALTERTABLE_UPDATETABLESTATS, HiveOperation.ALTERTABLE_UPDATEPARTSTATS}); + tablePartitionCommandType.put(HiveParser.TOK_ALTERTABLE_UPDATESTATS, + new HiveOperation[] {HiveOperation.ALTERTABLE_UPDATETABLESTATS, + HiveOperation.ALTERTABLE_UPDATEPARTSTATS}); } public static BaseSemanticAnalyzer get(HiveConf conf, ASTNode tree) diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/AlterTableDesc.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/AlterTableDesc.java index 2dabce2..3280475 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/AlterTableDesc.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/AlterTableDesc.java @@ -177,15 +177,16 @@ public AlterTableDesc(String name, HashMap partSpec, List partSpec, boolean expectView) { op = alterType; + this.partSpec = partSpec; this.expectView = expectView; } diff --git a/ql/src/test/queries/clientnegative/updateBasicStats.q b/ql/src/test/queries/clientnegative/updateBasicStats.q new file mode 100644 index 0000000..b9e642d --- /dev/null +++ b/ql/src/test/queries/clientnegative/updateBasicStats.q @@ -0,0 +1,5 @@ +set hive.mapred.mode=nonstrict; + +create table s as select * from src limit 10; + +alter table s update statistics set ('numRows'='NaN'); diff --git a/ql/src/test/queries/clientpositive/updateBasicStats.q b/ql/src/test/queries/clientpositive/updateBasicStats.q new file mode 100644 index 0000000..daa8029 --- /dev/null +++ b/ql/src/test/queries/clientpositive/updateBasicStats.q @@ -0,0 +1,54 @@ +set hive.mapred.mode=nonstrict; + +create table s as select * from src limit 10; + +explain select * from s; + +alter table s update statistics set('numRows'='12'); + +explain select * from s; + +analyze table s compute statistics; + +explain select * from s; + +alter table s update statistics set('numRows'='1212', 'rawDataSize'='500500'); + +explain select * from s; + +CREATE TABLE calendarp (`year` int) partitioned by (p int); + +insert into table calendarp partition (p=1) values (2010), (2011), (2012); + +explain select * from calendarp where p=1; + +alter table calendarp partition (p=1) update statistics set('numRows'='1000020000', 'rawDataSize'='300040000'); + +explain select * from calendarp where p=1; + +create table src_stat_part_two(key string, value string) partitioned by (px int, py string); + +insert overwrite table src_stat_part_two partition (px=1, py='a') + select * from src limit 1; + +insert overwrite table src_stat_part_two partition (px=1, py='b') + select * from src limit 10; + +insert overwrite table src_stat_part_two partition (px=2, py='b') + select * from src limit 100; + +explain select * from src_stat_part_two where px=1 and py='a'; + +explain select * from src_stat_part_two where px=1; + +alter table src_stat_part_two partition (px=1, py='a') update statistics set('numRows'='1000020000', 'rawDataSize'='300040000'); + +explain select * from src_stat_part_two where px=1 and py='a'; + +explain select * from src_stat_part_two where px=1; + +alter table src_stat_part_two partition (px=1) update statistics set('numRows'='1000020000', 'rawDataSize'='300040000'); + +explain select * from src_stat_part_two where px=1 and py='a'; + +explain select * from src_stat_part_two where px=1; diff --git a/ql/src/test/results/clientnegative/updateBasicStats.q.out b/ql/src/test/results/clientnegative/updateBasicStats.q.out new file mode 100644 index 0000000..3c4fe39 --- /dev/null +++ b/ql/src/test/results/clientnegative/updateBasicStats.q.out @@ -0,0 +1,11 @@ +PREHOOK: query: create table s as select * from src limit 10 +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@src +PREHOOK: Output: database:default +PREHOOK: Output: default@s +POSTHOOK: query: create table s as select * from src limit 10 +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@src +POSTHOOK: Output: database:default +POSTHOOK: Output: default@s +FAILED: SemanticException AlterTable numRows failed with value NaN diff --git a/ql/src/test/results/clientpositive/updateBasicStats.q.out b/ql/src/test/results/clientpositive/updateBasicStats.q.out new file mode 100644 index 0000000..3f04b99 --- /dev/null +++ b/ql/src/test/results/clientpositive/updateBasicStats.q.out @@ -0,0 +1,377 @@ +PREHOOK: query: create table s as select * from src limit 10 +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@src +PREHOOK: Output: database:default +PREHOOK: Output: default@s +POSTHOOK: query: create table s as select * from src limit 10 +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@src +POSTHOOK: Output: database:default +POSTHOOK: Output: default@s +PREHOOK: query: explain select * from s +PREHOOK: type: QUERY +POSTHOOK: query: explain select * from s +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: s + Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE + ListSink + +PREHOOK: query: alter table s update statistics set('numRows'='12') +PREHOOK: type: ALTERTABLE_UPDATETABLESTATS +PREHOOK: Input: default@s +PREHOOK: Output: default@s +POSTHOOK: query: alter table s update statistics set('numRows'='12') +POSTHOOK: type: ALTERTABLE_UPDATETABLESTATS +POSTHOOK: Input: default@s +POSTHOOK: Output: default@s +PREHOOK: query: explain select * from s +PREHOOK: type: QUERY +POSTHOOK: query: explain select * from s +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: s + Statistics: Num rows: 12 Data size: 104 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 12 Data size: 104 Basic stats: COMPLETE Column stats: NONE + ListSink + +PREHOOK: query: analyze table s compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@s +PREHOOK: Output: default@s +POSTHOOK: query: analyze table s compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@s +POSTHOOK: Output: default@s +PREHOOK: query: explain select * from s +PREHOOK: type: QUERY +POSTHOOK: query: explain select * from s +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: s + Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE + ListSink + +PREHOOK: query: alter table s update statistics set('numRows'='1212', 'rawDataSize'='500500') +PREHOOK: type: ALTERTABLE_UPDATETABLESTATS +PREHOOK: Input: default@s +PREHOOK: Output: default@s +POSTHOOK: query: alter table s update statistics set('numRows'='1212', 'rawDataSize'='500500') +POSTHOOK: type: ALTERTABLE_UPDATETABLESTATS +POSTHOOK: Input: default@s +POSTHOOK: Output: default@s +PREHOOK: query: explain select * from s +PREHOOK: type: QUERY +POSTHOOK: query: explain select * from s +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: s + Statistics: Num rows: 1212 Data size: 500500 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1212 Data size: 500500 Basic stats: COMPLETE Column stats: NONE + ListSink + +PREHOOK: query: CREATE TABLE calendarp (`year` int) partitioned by (p int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@calendarp +POSTHOOK: query: CREATE TABLE calendarp (`year` int) partitioned by (p int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@calendarp +PREHOOK: query: insert into table calendarp partition (p=1) values (2010), (2011), (2012) +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__1 +PREHOOK: Output: default@calendarp@p=1 +POSTHOOK: query: insert into table calendarp partition (p=1) values (2010), (2011), (2012) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__1 +POSTHOOK: Output: default@calendarp@p=1 +POSTHOOK: Lineage: calendarp PARTITION(p=1).year EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +PREHOOK: query: explain select * from calendarp where p=1 +PREHOOK: type: QUERY +POSTHOOK: query: explain select * from calendarp where p=1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: calendarp + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: year (type: int), 1 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + ListSink + +PREHOOK: query: alter table calendarp partition (p=1) update statistics set('numRows'='1000020000', 'rawDataSize'='300040000') +PREHOOK: type: ALTERTABLE_UPDATEPARTSTATS +PREHOOK: Input: default@calendarp +PREHOOK: Output: default@calendarp@p=1 +POSTHOOK: query: alter table calendarp partition (p=1) update statistics set('numRows'='1000020000', 'rawDataSize'='300040000') +POSTHOOK: type: ALTERTABLE_UPDATEPARTSTATS +POSTHOOK: Input: default@calendarp +POSTHOOK: Input: default@calendarp@p=1 +POSTHOOK: Output: default@calendarp@p=1 +PREHOOK: query: explain select * from calendarp where p=1 +PREHOOK: type: QUERY +POSTHOOK: query: explain select * from calendarp where p=1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: calendarp + Statistics: Num rows: 1000020000 Data size: 300040000 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: year (type: int), 1 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1000020000 Data size: 300040000 Basic stats: COMPLETE Column stats: NONE + ListSink + +PREHOOK: query: create table src_stat_part_two(key string, value string) partitioned by (px int, py string) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@src_stat_part_two +POSTHOOK: query: create table src_stat_part_two(key string, value string) partitioned by (px int, py string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@src_stat_part_two +PREHOOK: query: insert overwrite table src_stat_part_two partition (px=1, py='a') + select * from src limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@src_stat_part_two@px=1/py=a +POSTHOOK: query: insert overwrite table src_stat_part_two partition (px=1, py='a') + select * from src limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@src_stat_part_two@px=1/py=a +POSTHOOK: Lineage: src_stat_part_two PARTITION(px=1,py=a).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: src_stat_part_two PARTITION(px=1,py=a).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: insert overwrite table src_stat_part_two partition (px=1, py='b') + select * from src limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@src_stat_part_two@px=1/py=b +POSTHOOK: query: insert overwrite table src_stat_part_two partition (px=1, py='b') + select * from src limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@src_stat_part_two@px=1/py=b +POSTHOOK: Lineage: src_stat_part_two PARTITION(px=1,py=b).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: src_stat_part_two PARTITION(px=1,py=b).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: insert overwrite table src_stat_part_two partition (px=2, py='b') + select * from src limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@src_stat_part_two@px=2/py=b +POSTHOOK: query: insert overwrite table src_stat_part_two partition (px=2, py='b') + select * from src limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@src_stat_part_two@px=2/py=b +POSTHOOK: Lineage: src_stat_part_two PARTITION(px=2,py=b).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: src_stat_part_two PARTITION(px=2,py=b).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: explain select * from src_stat_part_two where px=1 and py='a' +PREHOOK: type: QUERY +POSTHOOK: query: explain select * from src_stat_part_two where px=1 and py='a' +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: src_stat_part_two + Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string), 1 (type: int), 'a' (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE + ListSink + +PREHOOK: query: explain select * from src_stat_part_two where px=1 +PREHOOK: type: QUERY +POSTHOOK: query: explain select * from src_stat_part_two where px=1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: src_stat_part_two + Statistics: Num rows: 11 Data size: 115 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string), 1 (type: int), py (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 11 Data size: 115 Basic stats: COMPLETE Column stats: NONE + ListSink + +PREHOOK: query: alter table src_stat_part_two partition (px=1, py='a') update statistics set('numRows'='1000020000', 'rawDataSize'='300040000') +PREHOOK: type: ALTERTABLE_UPDATEPARTSTATS +PREHOOK: Input: default@src_stat_part_two +PREHOOK: Output: default@src_stat_part_two@px=1/py=a +POSTHOOK: query: alter table src_stat_part_two partition (px=1, py='a') update statistics set('numRows'='1000020000', 'rawDataSize'='300040000') +POSTHOOK: type: ALTERTABLE_UPDATEPARTSTATS +POSTHOOK: Input: default@src_stat_part_two +POSTHOOK: Input: default@src_stat_part_two@px=1/py=a +POSTHOOK: Output: default@src_stat_part_two@px=1/py=a +PREHOOK: query: explain select * from src_stat_part_two where px=1 and py='a' +PREHOOK: type: QUERY +POSTHOOK: query: explain select * from src_stat_part_two where px=1 and py='a' +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: src_stat_part_two + Statistics: Num rows: 1000020000 Data size: 300040000 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string), 1 (type: int), 'a' (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1000020000 Data size: 300040000 Basic stats: COMPLETE Column stats: NONE + ListSink + +PREHOOK: query: explain select * from src_stat_part_two where px=1 +PREHOOK: type: QUERY +POSTHOOK: query: explain select * from src_stat_part_two where px=1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: src_stat_part_two + Statistics: Num rows: 1000020010 Data size: 300040104 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string), 1 (type: int), py (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1000020010 Data size: 300040104 Basic stats: COMPLETE Column stats: NONE + ListSink + +PREHOOK: query: alter table src_stat_part_two partition (px=1) update statistics set('numRows'='1000020000', 'rawDataSize'='300040000') +PREHOOK: type: ALTERTABLE_UPDATEPARTSTATS +PREHOOK: Input: default@src_stat_part_two +PREHOOK: Output: default@src_stat_part_two@px=1/py=a +PREHOOK: Output: default@src_stat_part_two@px=1/py=b +POSTHOOK: query: alter table src_stat_part_two partition (px=1) update statistics set('numRows'='1000020000', 'rawDataSize'='300040000') +POSTHOOK: type: ALTERTABLE_UPDATEPARTSTATS +POSTHOOK: Input: default@src_stat_part_two +POSTHOOK: Input: default@src_stat_part_two@px=1/py=a +POSTHOOK: Input: default@src_stat_part_two@px=1/py=b +POSTHOOK: Output: default@src_stat_part_two@px=1/py=a +POSTHOOK: Output: default@src_stat_part_two@px=1/py=b +PREHOOK: query: explain select * from src_stat_part_two where px=1 and py='a' +PREHOOK: type: QUERY +POSTHOOK: query: explain select * from src_stat_part_two where px=1 and py='a' +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: src_stat_part_two + Statistics: Num rows: 1000020000 Data size: 300040000 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string), 1 (type: int), 'a' (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1000020000 Data size: 300040000 Basic stats: COMPLETE Column stats: NONE + ListSink + +PREHOOK: query: explain select * from src_stat_part_two where px=1 +PREHOOK: type: QUERY +POSTHOOK: query: explain select * from src_stat_part_two where px=1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: src_stat_part_two + Statistics: Num rows: 2000040000 Data size: 600080000 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string), 1 (type: int), py (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 2000040000 Data size: 600080000 Basic stats: COMPLETE Column stats: NONE + ListSink +