diff --git a/common/src/java/org/apache/hadoop/hive/common/StatsSetupConst.java b/common/src/java/org/apache/hadoop/hive/common/StatsSetupConst.java index 2ff76ee..7854571 100644 --- a/common/src/java/org/apache/hadoop/hive/common/StatsSetupConst.java +++ b/common/src/java/org/apache/hadoop/hive/common/StatsSetupConst.java @@ -20,6 +20,7 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.conf.HiveConf; +import java.util.List; import java.util.Map; @@ -113,14 +114,55 @@ public String getAggregator(Configuration conf) { // This string constant will be persisted in metastore to indicate whether corresponding // table or partition's statistics are accurate or not. + public static final String TBL_OR_PART_STATS_ACCURATE = "TBL_OR_PART_STATS_ACCURATE"; + + //This string constant will be persisted in metastore to indicate whether corresponding + // table or partition's column statistics are accurate or not. public static final String COLUMN_STATS_ACCURATE = "COLUMN_STATS_ACCURATE"; + public static final String TRUE = "true"; public static final String FALSE = "false"; - public static boolean areStatsUptoDate(Map params) { - String statsAcc = params.get(COLUMN_STATS_ACCURATE); + public static boolean areTblOrPartStatsUptoDate(Map params) { + String statsAcc = params.get(TBL_OR_PART_STATS_ACCURATE); return statsAcc == null ? false : statsAcc.equals(TRUE); } + + public static boolean areColumnStatsUptoDate(Map params, String colName) { + String statsAcc = params.get(COLUMN_STATS_ACCURATE); + return statsAcc == null ? false : containsEncodedName(statsAcc, colName); + } + + public static String encodeColumnName(String name) { + // The encoding method is simple, e.g., replace + // all the special characters with the corresponding number in ASCII. + String ret = ""; + for (char ch : name.toCharArray()) { + if (Character.isLetterOrDigit(ch) || ch == '_') { + ret += ch; + } else { + ret += "-" + (int) ch + "-"; + } + } + return ret; + } + + public static String encodeColumnNames(List names) { + StringBuffer stringBuffer = new StringBuffer(); + for (String name : names) { + if (stringBuffer.length() != 0) { + stringBuffer.append(","); + stringBuffer.append(encodeColumnName(name)); + } else { + stringBuffer.append(encodeColumnName(name)); + } + } + return stringBuffer.toString(); + } + + public static boolean containsEncodedName(String colNames, String colName) { + return colNames.contains(encodeColumnName(colName)); + } } diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreUtils.java b/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreUtils.java index 23068f8..3b49ea9 100644 --- a/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreUtils.java +++ b/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreUtils.java @@ -236,10 +236,10 @@ public static boolean updateTableStatsFast(Table tbl, for (String stat : StatsSetupConst.statsRequireCompute) { params.put(stat, "-1"); } - params.put(StatsSetupConst.COLUMN_STATS_ACCURATE, StatsSetupConst.FALSE); + params.put(StatsSetupConst.TBL_OR_PART_STATS_ACCURATE, StatsSetupConst.FALSE); } else { params.remove(StatsSetupConst.STATS_GENERATED_VIA_STATS_TASK); - params.put(StatsSetupConst.COLUMN_STATS_ACCURATE, StatsSetupConst.TRUE); + params.put(StatsSetupConst.TBL_OR_PART_STATS_ACCURATE, StatsSetupConst.TRUE); } } tbl.setParameters(params); @@ -357,10 +357,10 @@ public static boolean updatePartitionStatsFast(PartitionSpecProxy.PartitionItera for (String stat : StatsSetupConst.statsRequireCompute) { params.put(stat, "-1"); } - params.put(StatsSetupConst.COLUMN_STATS_ACCURATE, StatsSetupConst.FALSE); + params.put(StatsSetupConst.TBL_OR_PART_STATS_ACCURATE, StatsSetupConst.FALSE); } else { params.remove(StatsSetupConst.STATS_GENERATED_VIA_STATS_TASK); - params.put(StatsSetupConst.COLUMN_STATS_ACCURATE, StatsSetupConst.TRUE); + params.put(StatsSetupConst.TBL_OR_PART_STATS_ACCURATE, StatsSetupConst.TRUE); } } part.setParameters(params); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/ColumnStatsTask.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/ColumnStatsTask.java index f6fbe74..517e345 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/ColumnStatsTask.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/ColumnStatsTask.java @@ -22,13 +22,21 @@ import java.io.Serializable; import java.nio.ByteBuffer; import java.util.ArrayList; +import java.util.HashMap; +import java.util.LinkedHashMap; import java.util.List; +import java.util.Map; + +import jodd.io.FileUtil; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import org.apache.hadoop.hive.common.FileUtils; +import org.apache.hadoop.hive.common.StatsSetupConst; import org.apache.hadoop.hive.common.type.HiveDecimal; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.conf.HiveConf.ConfVars; +import org.apache.hadoop.hive.metastore.MetaStoreUtils; import org.apache.hadoop.hive.metastore.Warehouse; import org.apache.hadoop.hive.metastore.api.BinaryColumnStatsData; import org.apache.hadoop.hive.metastore.api.BooleanColumnStatsData; @@ -42,14 +50,18 @@ import org.apache.hadoop.hive.metastore.api.DecimalColumnStatsData; import org.apache.hadoop.hive.metastore.api.DoubleColumnStatsData; import org.apache.hadoop.hive.metastore.api.FieldSchema; +import org.apache.hadoop.hive.metastore.api.InvalidOperationException; import org.apache.hadoop.hive.metastore.api.LongColumnStatsData; import org.apache.hadoop.hive.metastore.api.MetaException; +import org.apache.hadoop.hive.metastore.api.PartitionSpec; import org.apache.hadoop.hive.metastore.api.SetPartitionsStatsRequest; import org.apache.hadoop.hive.metastore.api.StringColumnStatsData; import org.apache.hadoop.hive.ql.DriverContext; import org.apache.hadoop.hive.ql.QueryPlan; import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.metadata.Partition; import org.apache.hadoop.hive.ql.metadata.Table; +import org.apache.hadoop.hive.ql.parse.ASTNode; import org.apache.hadoop.hive.ql.plan.ColumnStatsWork; import org.apache.hadoop.hive.ql.plan.api.StageType; import org.apache.hadoop.hive.ql.session.SessionState; @@ -292,17 +304,19 @@ private void unpackStructObject(ObjectInspector oi, Object o, String fName, } } - private List constructColumnStatsFromPackedRows() throws HiveException, MetaException, IOException { + private List constructColumnStatsFromPackedRows() throws HiveException, MetaException, IOException, InvalidOperationException { String currentDb = SessionState.get().getCurrentDatabase(); String tableName = work.getColStats().getTableName(); String partName = null; + Map spec = null; List colName = work.getColStats().getColName(); List colType = work.getColStats().getColType(); boolean isTblLevel = work.getColStats().isTblLevel(); List stats = new ArrayList(); InspectableObject packedRow; + Table tbl = db.getTable(currentDb,tableName); while ((packedRow = ftOp.getNextRow()) != null) { if (packedRow.oi.getCategory() != ObjectInspector.Category.STRUCT) { throw new HiveException("Unexpected object type encountered while unpacking row"); @@ -313,7 +327,7 @@ private void unpackStructObject(ObjectInspector oi, Object o, String fName, List fields = soi.getAllStructFieldRefs(); List list = soi.getStructFieldsDataAsList(packedRow.o); - Table tbl = db.getTable(currentDb,tableName); + List partColSchema = tbl.getPartCols(); // Partition columns are appended at end, we only care about stats column int numOfStatCols = isTblLevel ? fields.size() : fields.size() - partColSchema.size(); @@ -339,6 +353,7 @@ private void unpackStructObject(ObjectInspector oi, Object o, String fName, this.conf.getVar(ConfVars.DEFAULTPARTITIONNAME) : partVal.toString()); } partName = Warehouse.makePartName(partColSchema, partVals); + spec = Warehouse.makeSpecFromValues(partColSchema, partVals); } String [] names = Utilities.getDbTableName(currentDb, tableName); ColumnStatisticsDesc statsDesc = getColumnStatsDesc(names[0], names[1], partName, isTblLevel); @@ -346,6 +361,21 @@ private void unpackStructObject(ObjectInspector oi, Object o, String fName, colStats.setStatsDesc(statsDesc); colStats.setStatsObj(statsObjs); stats.add(colStats); + + } + // This is the only way to tell statsOptimizer that column stats are + // accurate + if (isTblLevel) { + Map params = tbl.getParameters(); + params.put(StatsSetupConst.COLUMN_STATS_ACCURATE, StatsSetupConst.encodeColumnNames(colName)); + tbl.setParameters(params); + db.alterTable(tableName, tbl); + } else { + Partition part = db.getPartition(tbl, spec, false); + part.getParameters().put(StatsSetupConst.COLUMN_STATS_ACCURATE, + StatsSetupConst.encodeColumnNames(colName)); + db.alterPartition(tbl.getDbName(), tbl.getTableName(), + new Partition(tbl, part.getTPartition())); } ftOp.clearFetchContext(); return stats; @@ -367,7 +397,7 @@ private ColumnStatisticsDesc getColumnStatsDesc(String dbName, String tableName, return statsDesc; } - private int persistPartitionStats() throws HiveException, MetaException, IOException { + private int persistPartitionStats() throws HiveException, MetaException, IOException, InvalidOperationException { // Fetch result of the analyze table partition (p1=c1).. compute statistics for columns .. // Construct a column statistics object from the result @@ -377,7 +407,7 @@ private int persistPartitionStats() throws HiveException, MetaException, IOExcep return 0; } - private int persistTableStats() throws HiveException, MetaException, IOException { + private int persistTableStats() throws HiveException, MetaException, IOException, InvalidOperationException { // Fetch result of the analyze table .. compute statistics for columns .. // Construct a column statistics object from the result ColumnStatistics colStats = constructColumnStatsFromPackedRows().get(0); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java index 4fb6c00..d88c135 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java @@ -4232,7 +4232,7 @@ private boolean needToUpdateStats(Map props) { if (statVal != null && Long.parseLong(statVal) > 0) { statsPresent = true; props.put(stat, "0"); - props.put(StatsSetupConst.COLUMN_STATS_ACCURATE, "false"); + props.put(StatsSetupConst.TBL_OR_PART_STATS_ACCURATE, "false"); } } return statsPresent; diff --git a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java index c682df2..c14158c 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java @@ -524,7 +524,7 @@ public void createTable(String tableName, List columns, List par tbl.setNumBuckets(bucketCount); tbl.setBucketCols(bucketCols); if (parameters != null) { - tbl.setParamters(parameters); + tbl.setParameters(parameters); } createTable(tbl); } @@ -1478,6 +1478,8 @@ public Partition loadPartition(Path loadPath, Table tbl, newTPart = getPartition(tbl, partSpec, true, newPartPath.toString(), inheritTableSpecs, newFiles); + //column stats will be inaccurate + newTPart.getParameters().remove(StatsSetupConst.COLUMN_STATS_ACCURATE); // recreate the partition if it existed before if (isSkewedStoreAsSubdir) { @@ -1490,7 +1492,7 @@ public Partition loadPartition(Path loadPath, Table tbl, skewedInfo.setSkewedColValueLocationMaps(skewedColValueLocationMaps); newCreatedTpart.getSd().setSkewedInfo(skewedInfo); if(!this.getConf().getBoolVar(HiveConf.ConfVars.HIVESTATSAUTOGATHER)) { - newTPart.getParameters().put(StatsSetupConst.COLUMN_STATS_ACCURATE, "false"); + newTPart.getParameters().put(StatsSetupConst.TBL_OR_PART_STATS_ACCURATE, "false"); } alterPartition(tbl.getDbName(), tbl.getTableName(), new Partition(tbl, newCreatedTpart)); newTPart = getPartition(tbl, partSpec, true, newPartPath.toString(), inheritTableSpecs, @@ -1498,7 +1500,7 @@ public Partition loadPartition(Path loadPath, Table tbl, return new Partition(tbl, newCreatedTpart); } if(!this.getConf().getBoolVar(HiveConf.ConfVars.HIVESTATSAUTOGATHER)) { - newTPart.getParameters().put(StatsSetupConst.COLUMN_STATS_ACCURATE, "false"); + newTPart.getParameters().put(StatsSetupConst.TBL_OR_PART_STATS_ACCURATE, "false"); alterPartition(tbl.getDbName(), tbl.getTableName(), new Partition(tbl, newTPart.getTPartition())); } } catch (IOException e) { @@ -1730,11 +1732,14 @@ public void loadTable(Path loadPath, String tableName, boolean replace, } } if(!this.getConf().getBoolVar(HiveConf.ConfVars.HIVESTATSAUTOGATHER)) { - tbl.getParameters().put(StatsSetupConst.COLUMN_STATS_ACCURATE, "false"); + tbl.getParameters().put(StatsSetupConst.TBL_OR_PART_STATS_ACCURATE, "false"); } else { tbl.getParameters().put(StatsSetupConst.STATS_GENERATED_VIA_STATS_TASK, "true"); } + //column stats will be inaccurate + tbl.getParameters().remove(StatsSetupConst.COLUMN_STATS_ACCURATE); + try { if (isSkewedStoreAsSubdir) { SkewedInfo skewedInfo = tbl.getSkewedInfo(); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java index d2a5948..caddfb7 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java @@ -382,7 +382,7 @@ public void setProperty(String name, String value) { tTable.getParameters().put(name, value); } - public void setParamters(Map params) { + public void setParameters(Map params) { tTable.setParameters(params); } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/StatsOptimizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/StatsOptimizer.java index ffe706e..dedcbc4 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/StatsOptimizer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/StatsOptimizer.java @@ -332,7 +332,7 @@ else if (udaf instanceof GenericUDAFCount) { String colName = desc.getColumn(); StatType type = getType(desc.getTypeString()); if(!tbl.isPartitioned()) { - if (!StatsSetupConst.areStatsUptoDate(tbl.getParameters())) { + if (!StatsSetupConst.areTblOrPartStatsUptoDate(tbl.getParameters())) { Logger.debug("Stats for table : " + tbl.getTableName() + " are not upto date."); return null; } @@ -341,6 +341,11 @@ else if (udaf instanceof GenericUDAFCount) { Logger.debug("Table doesn't have upto date stats " + tbl.getTableName()); return null; } + if (!StatsSetupConst.areColumnStatsUptoDate(tbl.getParameters(), colName)) { + Logger.debug("Stats for table : " + tbl.getTableName() + " column " + colName + + " are not upto date."); + return null; + } List stats = hive.getMSC().getTableColumnStatistics( tbl.getDbName(),tbl.getTableName(), Lists.newArrayList(colName)); if (stats.isEmpty()) { @@ -359,7 +364,7 @@ else if (udaf instanceof GenericUDAFCount) { Set parts = pctx.getPrunedPartitions( tsOp.getConf().getAlias(), tsOp).getPartitions(); for (Partition part : parts) { - if (!StatsSetupConst.areStatsUptoDate(part.getParameters())) { + if (!StatsSetupConst.areTblOrPartStatsUptoDate(part.getParameters())) { Logger.debug("Stats for part : " + part.getSpec() + " are not upto date."); return null; } @@ -372,7 +377,7 @@ else if (udaf instanceof GenericUDAFCount) { rowCnt += partRowCnt; } Collection> result = - verifyAndGetPartStats(hive, tbl, colName, parts); + verifyAndGetPartColumnStats(hive, tbl, colName, parts); if (result == null) { return null; // logging inside } @@ -396,8 +401,9 @@ else if (udaf instanceof GenericUDAFCount) { String colName = colDesc.getColumn(); StatType type = getType(colDesc.getTypeString()); if(!tbl.isPartitioned()) { - if (!StatsSetupConst.areStatsUptoDate(tbl.getParameters())) { - Logger.debug("Stats for table : " + tbl.getTableName() + " are not upto date."); + if (!StatsSetupConst.areColumnStatsUptoDate(tbl.getParameters(), colName)) { + Logger.debug("Stats for table : " + tbl.getTableName() + " column " + colName + + " are not upto date."); return null; } List stats = hive.getMSC().getTableColumnStatistics( @@ -445,7 +451,7 @@ else if (udaf instanceof GenericUDAFCount) { Long maxVal = null; Collection> result = - verifyAndGetPartStats(hive, tbl, colName, parts); + verifyAndGetPartColumnStats(hive, tbl, colName, parts); if (result == null) { return null; // logging inside } @@ -471,7 +477,7 @@ else if (udaf instanceof GenericUDAFCount) { Double maxVal = null; Collection> result = - verifyAndGetPartStats(hive, tbl, colName, parts); + verifyAndGetPartColumnStats(hive, tbl, colName, parts); if (result == null) { return null; // logging inside } @@ -503,8 +509,9 @@ else if (udaf instanceof GenericUDAFCount) { String colName = colDesc.getColumn(); StatType type = getType(colDesc.getTypeString()); if (!tbl.isPartitioned()) { - if (!StatsSetupConst.areStatsUptoDate(tbl.getParameters())) { - Logger.debug("Stats for table : " + tbl.getTableName() + " are not upto date."); + if (!StatsSetupConst.areColumnStatsUptoDate(tbl.getParameters(), colName)) { + Logger.debug("Stats for table : " + tbl.getTableName() + " column " + colName + + " are not upto date."); return null; } ColumnStatisticsData statData = hive.getMSC().getTableColumnStatistics( @@ -546,7 +553,7 @@ else if (udaf instanceof GenericUDAFCount) { Long minVal = null; Collection> result = - verifyAndGetPartStats(hive, tbl, colName, parts); + verifyAndGetPartColumnStats(hive, tbl, colName, parts); if (result == null) { return null; // logging inside } @@ -572,7 +579,7 @@ else if (udaf instanceof GenericUDAFCount) { Double minVal = null; Collection> result = - verifyAndGetPartStats(hive, tbl, colName, parts); + verifyAndGetPartColumnStats(hive, tbl, colName, parts); if (result == null) { return null; // logging inside } @@ -661,12 +668,13 @@ private ColumnStatisticsData validateSingleColStat(List sta return statObj.get(0).getStatsData(); } - private Collection> verifyAndGetPartStats( + private Collection> verifyAndGetPartColumnStats( Hive hive, Table tbl, String colName, Set parts) throws TException { List partNames = new ArrayList(parts.size()); for (Partition part : parts) { - if (!StatsSetupConst.areStatsUptoDate(part.getParameters())) { - Logger.debug("Stats for part : " + part.getSpec() + " are not upto date."); + if (!StatsSetupConst.areColumnStatsUptoDate(part.getParameters(), colName)) { + Logger.debug("Stats for part : " + part.getSpec() + " column " + colName + + " are not upto date."); return null; } partNames.add(part.getName()); @@ -686,7 +694,7 @@ private Long getRowCnt( if (tbl.isPartitioned()) { for (Partition part : pctx.getPrunedPartitions( tsOp.getConf().getAlias(), tsOp).getPartitions()) { - if (!StatsSetupConst.areStatsUptoDate(part.getParameters())) { + if (!StatsSetupConst.areTblOrPartStatsUptoDate(part.getParameters())) { return null; } long partRowCnt = Long.parseLong(part.getParameters().get(StatsSetupConst.ROW_COUNT)); @@ -697,7 +705,7 @@ private Long getRowCnt( rowCnt += partRowCnt; } } else { // unpartitioned table - if (!StatsSetupConst.areStatsUptoDate(tbl.getParameters())) { + if (!StatsSetupConst.areTblOrPartStatsUptoDate(tbl.getParameters())) { return null; } rowCnt = Long.parseLong(tbl.getProperty(StatsSetupConst.ROW_COUNT)); diff --git a/ql/src/test/queries/clientpositive/columnStatsUpdateForStatsOptimizer.q b/ql/src/test/queries/clientpositive/columnStatsUpdateForStatsOptimizer.q new file mode 100644 index 0000000..731caaf --- /dev/null +++ b/ql/src/test/queries/clientpositive/columnStatsUpdateForStatsOptimizer.q @@ -0,0 +1,76 @@ +set hive.stats.fetch.column.stats=true; +set hive.stats.fetch.partition.stats=true; +set hive.compute.query.using.stats=true; +set hive.mapred.mode=nonstrict; +set hive.exec.dynamic.partition.mode=nonstrict; +set hive.support.concurrency=true; +set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager; + +CREATE TABLE calendar (year int, month int) clustered by (month) into 2 buckets stored as orc TBLPROPERTIES ('transactional'='true');; + +insert into calendar values (2010, 10), (2011, 11), (2012, 12); + +explain select max(year) from calendar; + +select max(year) from calendar; + +select max(month) from calendar; + +analyze table calendar compute statistics for columns; + +explain select max(year) from calendar; + +select max(year) from calendar; + +explain select max(month) from calendar; + +select max(month) from calendar; + +insert into calendar values (2015, 15); + +explain select max(year) from calendar; + +select max(year) from calendar; + +explain select max(month) from calendar; + +select max(month) from calendar; + + +analyze table calendar compute statistics for columns; + +explain select max(year), min(month) from calendar; + +select max(year), min(month) from calendar; + +update calendar set year=2018 where month=15; + +explain select max(year) from calendar; + +explain select min(month) from calendar; + +explain select max(year), min(month) from calendar; + +select max(year), min(month) from calendar; + + +CREATE TABLE calendarp (`year` int) partitioned by (p int); + +insert into table calendarp partition (p=1) values (2010), (2011), (2012); + +explain select max(year) from calendarp where p=1; + +select max(year) from calendarp where p=1; + +analyze table calendarp partition (p=1) compute statistics for columns; + +explain select max(year) from calendarp where p=1; + +insert into table calendarp partition (p=1) values (2015); + +explain select max(year) from calendarp where p=1; + +select max(year) from calendarp where p=1; + + + diff --git a/ql/src/test/results/clientpositive/columnStatsUpdateForStatsOptimizer.q.out b/ql/src/test/results/clientpositive/columnStatsUpdateForStatsOptimizer.q.out new file mode 100644 index 0000000..5fdd623 --- /dev/null +++ b/ql/src/test/results/clientpositive/columnStatsUpdateForStatsOptimizer.q.out @@ -0,0 +1,635 @@ +PREHOOK: query: CREATE TABLE calendar (year int, month int) clustered by (month) into 2 buckets stored as orc TBLPROPERTIES ('transactional'='true') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@calendar +POSTHOOK: query: CREATE TABLE calendar (year int, month int) clustered by (month) into 2 buckets stored as orc TBLPROPERTIES ('transactional'='true') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@calendar +PREHOOK: query: insert into calendar values (2010, 10), (2011, 11), (2012, 12) +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__1 +PREHOOK: Output: default@calendar +POSTHOOK: query: insert into calendar values (2010, 10), (2011, 11), (2012, 12) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__1 +POSTHOOK: Output: default@calendar +POSTHOOK: Lineage: calendar.month EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: calendar.year EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +PREHOOK: query: explain select max(year) from calendar +PREHOOK: type: QUERY +POSTHOOK: query: explain select max(year) from calendar +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: calendar + Statistics: Num rows: 3 Data size: 1242 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: year (type: int) + outputColumnNames: year + Statistics: Num rows: 3 Data size: 1242 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: max(year) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int) + Reduce Operator Tree: + Group By Operator + aggregations: max(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select max(year) from calendar +PREHOOK: type: QUERY +PREHOOK: Input: default@calendar +#### A masked pattern was here #### +POSTHOOK: query: select max(year) from calendar +POSTHOOK: type: QUERY +POSTHOOK: Input: default@calendar +#### A masked pattern was here #### +2012 +PREHOOK: query: select max(month) from calendar +PREHOOK: type: QUERY +PREHOOK: Input: default@calendar +#### A masked pattern was here #### +POSTHOOK: query: select max(month) from calendar +POSTHOOK: type: QUERY +POSTHOOK: Input: default@calendar +#### A masked pattern was here #### +12 +PREHOOK: query: analyze table calendar compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@calendar +#### A masked pattern was here #### +POSTHOOK: query: analyze table calendar compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@calendar +#### A masked pattern was here #### +PREHOOK: query: explain select max(year) from calendar +PREHOOK: type: QUERY +POSTHOOK: query: explain select max(year) from calendar +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: 1 + Processor Tree: + ListSink + +PREHOOK: query: select max(year) from calendar +PREHOOK: type: QUERY +PREHOOK: Input: default@calendar +#### A masked pattern was here #### +POSTHOOK: query: select max(year) from calendar +POSTHOOK: type: QUERY +POSTHOOK: Input: default@calendar +#### A masked pattern was here #### +2012 +PREHOOK: query: explain select max(month) from calendar +PREHOOK: type: QUERY +POSTHOOK: query: explain select max(month) from calendar +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: 1 + Processor Tree: + ListSink + +PREHOOK: query: select max(month) from calendar +PREHOOK: type: QUERY +PREHOOK: Input: default@calendar +#### A masked pattern was here #### +POSTHOOK: query: select max(month) from calendar +POSTHOOK: type: QUERY +POSTHOOK: Input: default@calendar +#### A masked pattern was here #### +12 +PREHOOK: query: insert into calendar values (2015, 15) +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__2 +PREHOOK: Output: default@calendar +POSTHOOK: query: insert into calendar values (2015, 15) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__2 +POSTHOOK: Output: default@calendar +POSTHOOK: Lineage: calendar.month EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: calendar.year EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +PREHOOK: query: explain select max(year) from calendar +PREHOOK: type: QUERY +POSTHOOK: query: explain select max(year) from calendar +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: calendar + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: year (type: int) + outputColumnNames: year + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: max(year) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int) + Reduce Operator Tree: + Group By Operator + aggregations: max(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select max(year) from calendar +PREHOOK: type: QUERY +PREHOOK: Input: default@calendar +#### A masked pattern was here #### +POSTHOOK: query: select max(year) from calendar +POSTHOOK: type: QUERY +POSTHOOK: Input: default@calendar +#### A masked pattern was here #### +2015 +PREHOOK: query: explain select max(month) from calendar +PREHOOK: type: QUERY +POSTHOOK: query: explain select max(month) from calendar +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: calendar + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: month (type: int) + outputColumnNames: month + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: max(month) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int) + Reduce Operator Tree: + Group By Operator + aggregations: max(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select max(month) from calendar +PREHOOK: type: QUERY +PREHOOK: Input: default@calendar +#### A masked pattern was here #### +POSTHOOK: query: select max(month) from calendar +POSTHOOK: type: QUERY +POSTHOOK: Input: default@calendar +#### A masked pattern was here #### +15 +PREHOOK: query: analyze table calendar compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@calendar +#### A masked pattern was here #### +POSTHOOK: query: analyze table calendar compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@calendar +#### A masked pattern was here #### +PREHOOK: query: explain select max(year), min(month) from calendar +PREHOOK: type: QUERY +POSTHOOK: query: explain select max(year), min(month) from calendar +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: 1 + Processor Tree: + ListSink + +PREHOOK: query: select max(year), min(month) from calendar +PREHOOK: type: QUERY +PREHOOK: Input: default@calendar +#### A masked pattern was here #### +POSTHOOK: query: select max(year), min(month) from calendar +POSTHOOK: type: QUERY +POSTHOOK: Input: default@calendar +#### A masked pattern was here #### +2015 10 +PREHOOK: query: explain update calendar set year=2018 where month=15 +PREHOOK: type: QUERY +POSTHOOK: query: explain update calendar set year=2018 where month=15 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: calendar + Filter Operator + predicate: (month = 15) (type: boolean) + Select Operator + expressions: ROW__ID (type: struct) + outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: struct) + sort order: + + Map-reduce partition columns: UDFToInteger(_col0) (type: int) + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: struct), 2018 (type: int), 15 (type: int) + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.calendar + + Stage: Stage-0 + Move Operator + tables: + replace: false + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.calendar + + Stage: Stage-2 + Stats-Aggr Operator + +PREHOOK: query: update calendar set year=2018 where month=15 +PREHOOK: type: QUERY +PREHOOK: Input: default@calendar +PREHOOK: Output: default@calendar +POSTHOOK: query: update calendar set year=2018 where month=15 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@calendar +POSTHOOK: Output: default@calendar +PREHOOK: query: explain select max(year) from calendar +PREHOOK: type: QUERY +POSTHOOK: query: explain select max(year) from calendar +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: calendar + Statistics: Num rows: 676 Data size: 2704 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: year (type: int) + outputColumnNames: year + Statistics: Num rows: 676 Data size: 2704 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: max(year) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int) + Reduce Operator Tree: + Group By Operator + aggregations: max(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain select min(month) from calendar +PREHOOK: type: QUERY +POSTHOOK: query: explain select min(month) from calendar +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: calendar + Statistics: Num rows: 676 Data size: 2704 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: month (type: int) + outputColumnNames: month + Statistics: Num rows: 676 Data size: 2704 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: min(month) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int) + Reduce Operator Tree: + Group By Operator + aggregations: min(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain select max(year), min(month) from calendar +PREHOOK: type: QUERY +POSTHOOK: query: explain select max(year), min(month) from calendar +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: calendar + Statistics: Num rows: 338 Data size: 2704 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: year (type: int), month (type: int) + outputColumnNames: year, month + Statistics: Num rows: 338 Data size: 2704 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: max(year), min(month) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int) + Reduce Operator Tree: + Group By Operator + aggregations: max(VALUE._col0), min(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select max(year), min(month) from calendar +PREHOOK: type: QUERY +PREHOOK: Input: default@calendar +#### A masked pattern was here #### +POSTHOOK: query: select max(year), min(month) from calendar +POSTHOOK: type: QUERY +POSTHOOK: Input: default@calendar +#### A masked pattern was here #### +2018 10 +PREHOOK: query: CREATE TABLE calendarp (`year` int) partitioned by (p int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@calendarp +POSTHOOK: query: CREATE TABLE calendarp (`year` int) partitioned by (p int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@calendarp +PREHOOK: query: insert into table calendarp partition (p=1) values (2010), (2011), (2012) +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__3 +PREHOOK: Output: default@calendarp@p=1 +POSTHOOK: query: insert into table calendarp partition (p=1) values (2010), (2011), (2012) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__3 +POSTHOOK: Output: default@calendarp@p=1 +POSTHOOK: Lineage: calendarp PARTITION(p=1).year EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +PREHOOK: query: explain select max(year) from calendarp where p=1 +PREHOOK: type: QUERY +POSTHOOK: query: explain select max(year) from calendarp where p=1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: calendarp + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: year (type: int) + outputColumnNames: year + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: max(year) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int) + Reduce Operator Tree: + Group By Operator + aggregations: max(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select max(year) from calendarp where p=1 +PREHOOK: type: QUERY +PREHOOK: Input: default@calendarp +PREHOOK: Input: default@calendarp@p=1 +#### A masked pattern was here #### +POSTHOOK: query: select max(year) from calendarp where p=1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@calendarp +POSTHOOK: Input: default@calendarp@p=1 +#### A masked pattern was here #### +2012 +PREHOOK: query: analyze table calendarp partition (p=1) compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@calendarp +PREHOOK: Input: default@calendarp@p=1 +#### A masked pattern was here #### +POSTHOOK: query: analyze table calendarp partition (p=1) compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@calendarp +POSTHOOK: Input: default@calendarp@p=1 +#### A masked pattern was here #### +PREHOOK: query: explain select max(year) from calendarp where p=1 +PREHOOK: type: QUERY +POSTHOOK: query: explain select max(year) from calendarp where p=1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: 1 + Processor Tree: + ListSink + +PREHOOK: query: insert into table calendarp partition (p=1) values (2015) +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__4 +PREHOOK: Output: default@calendarp@p=1 +POSTHOOK: query: insert into table calendarp partition (p=1) values (2015) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__4 +POSTHOOK: Output: default@calendarp@p=1 +POSTHOOK: Lineage: calendarp PARTITION(p=1).year EXPRESSION [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +PREHOOK: query: explain select max(year) from calendarp where p=1 +PREHOOK: type: QUERY +POSTHOOK: query: explain select max(year) from calendarp where p=1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: 1 + Processor Tree: + ListSink + +PREHOOK: query: select max(year) from calendarp where p=1 +PREHOOK: type: QUERY +PREHOOK: Input: default@calendarp +#### A masked pattern was here #### +POSTHOOK: query: select max(year) from calendarp where p=1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@calendarp +#### A masked pattern was here #### +2012