diff --git a/common/src/java/org/apache/hadoop/hive/common/StatsSetupConst.java b/common/src/java/org/apache/hadoop/hive/common/StatsSetupConst.java index a9e17c2..6fec9ac 100644 --- a/common/src/java/org/apache/hadoop/hive/common/StatsSetupConst.java +++ b/common/src/java/org/apache/hadoop/hive/common/StatsSetupConst.java @@ -20,6 +20,7 @@ import java.io.IOException; import java.util.List; import java.util.Map; +import java.util.Set; import java.util.TreeMap; import org.apache.hadoop.conf.Configuration; @@ -152,6 +153,8 @@ public String getAggregator(Configuration conf) { public static final String FALSE = "false"; + public static final String PENDING = "pending"; + // The parameter keys for the table statistics. Those keys are excluded from 'show create table' command output. public static final String[] TABLE_PARAMS_STATS_KEYS = new String[] { COLUMN_STATS_ACCURATE, NUM_FILES, TOTAL_SIZE,ROW_COUNT, RAW_DATA_SIZE, NUM_PARTITIONS}; @@ -184,6 +187,24 @@ public Boolean deserialize(JsonParser jsonParser, } } + static class StringSerializer extends JsonSerializer { + + @Override + public void serialize(String value, JsonGenerator jsonGenerator, + SerializerProvider serializerProvider) throws IOException, JsonProcessingException { + jsonGenerator.writeString(value); + } + } + + static class StringDeserializer extends JsonDeserializer { + + public String deserialize(JsonParser jsonParser, + DeserializationContext deserializationContext) + throws IOException, JsonProcessingException { + return jsonParser.getValueAsString(); + } + } + @JsonInclude(JsonInclude.Include.NON_DEFAULT) @JsonSerialize(using = BooleanSerializer.class) @JsonDeserialize(using = BooleanDeserializer.class) @@ -192,9 +213,9 @@ public Boolean deserialize(JsonParser jsonParser, @JsonInclude(JsonInclude.Include.NON_EMPTY) @JsonProperty(COLUMN_STATS) - @JsonSerialize(contentUsing = BooleanSerializer.class) - @JsonDeserialize(contentUsing = BooleanDeserializer.class) - TreeMap columnStats = new TreeMap<>(); + @JsonSerialize(contentUsing = StringSerializer.class) + @JsonDeserialize(contentUsing = StringDeserializer.class) + TreeMap columnStats = new TreeMap<>(); }; @@ -211,7 +232,16 @@ public static boolean areColumnStatsUptoDate(Map params, String return false; } ColumnStatsAccurate stats = parseStatsAcc(params.get(COLUMN_STATS_ACCURATE)); - return stats.columnStats.containsKey(colName); + return TRUE.equals(stats.columnStats.get(colName)); + } + + public static boolean canColumnStatsMerge(Map params, String colName) { + if (params == null) { + return false; + } + ColumnStatsAccurate stats = parseStatsAcc(params.get(COLUMN_STATS_ACCURATE)); + return TRUE.equals(stats.columnStats.get(colName)) + || PENDING.equals(stats.columnStats.get(colName)); } // It will only throw JSONException when stats.put(BASIC_STATS, TRUE) @@ -243,9 +273,7 @@ public static void setColumnStatsState(Map params, List ColumnStatsAccurate stats = parseStatsAcc(params.get(COLUMN_STATS_ACCURATE)); for (String colName : colNames) { - if (!stats.columnStats.containsKey(colName)) { - stats.columnStats.put(colName, true); - } + stats.columnStats.put(colName, TRUE); } try { params.put(COLUMN_STATS_ACCURATE, ColumnStatsAccurate.objectWriter.writeValueAsString(stats)); @@ -254,13 +282,20 @@ public static void setColumnStatsState(Map params, List } } - public static void clearColumnStatsState(Map params) { + public static void clearColumnStatsState(Map params, + boolean hasFollowingColumnStatsTaskNeedMerge) { if (params == null) { return; } ColumnStatsAccurate stats = parseStatsAcc(params.get(COLUMN_STATS_ACCURATE)); - stats.columnStats.clear(); - + if (!hasFollowingColumnStatsTaskNeedMerge) { + stats.columnStats.clear(); + } else { + Set colNames = stats.columnStats.keySet(); + for (String colName : colNames) { + stats.columnStats.put(colName, PENDING); + } + } try { params.put(COLUMN_STATS_ACCURATE, ColumnStatsAccurate.objectWriter.writeValueAsString(stats)); } catch (JsonProcessingException e) { @@ -283,13 +318,17 @@ public static void removeColumnStatsState(Map params, List params, String setting) { + public static void setBasicStatsStateForCreateTable(Map params, + List cols, String setting) { if (TRUE.equals(setting)) { for (String stat : StatsSetupConst.supportedStats) { params.put(stat, "0"); } } setBasicStatsState(params, setting); + if (cols != null) { + setColumnStatsState(params, cols); + } } private static ColumnStatsAccurate parseStatsAcc(String statsAcc) { diff --git a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/history/TestHiveHistory.java b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/history/TestHiveHistory.java index 76c1636..3803971 100644 --- a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/history/TestHiveHistory.java +++ b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/history/TestHiveHistory.java @@ -103,7 +103,7 @@ protected void setUp() { db.dropTable(MetaStoreUtils.DEFAULT_DATABASE_NAME, src, true, true); db.createTable(src, cols, null, TextInputFormat.class, IgnoreKeyTextOutputFormat.class); - db.loadTable(hadoopDataFile[i], src, false, false, false, false, false); + db.loadTable(hadoopDataFile[i], src, false, false, false, false, false, false); i++; } diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreUtils.java b/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreUtils.java index 1b701e0..6d72202 100644 --- a/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreUtils.java +++ b/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreUtils.java @@ -69,6 +69,7 @@ import org.apache.hadoop.hive.metastore.api.StorageDescriptor; import org.apache.hadoop.hive.metastore.api.Table; import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants; +import org.apache.hadoop.hive.metastore.api.ColumnStatisticsData._Fields; import org.apache.hadoop.hive.metastore.hbase.stats.merge.ColumnStatsMerger; import org.apache.hadoop.hive.metastore.hbase.stats.merge.ColumnStatsMergerFactory; import org.apache.hadoop.hive.metastore.partition.spec.PartitionSpecProxy; @@ -1928,13 +1929,30 @@ public static void mergeColStats(ColumnStatistics csNew, ColumnStatistics csOld) ColumnStatisticsObj statsObjNew = csNew.getStatsObj().get(index); ColumnStatisticsObj statsObjOld = map.get(statsObjNew.getColName()); if (statsObjOld != null) { + if (statsObjNew.getStatsData().getSetField() != statsObjOld.getStatsData().getSetField()) { + // because we already confirm that the stats is accurate + // it is impossible that the column types have been changed while the + // column stats is still accurate. + throw new RuntimeException("Column " + statsObjNew.getColName() + + "'s old type is different from new type. " + + "We can not merge stats in auto column stats gathering."); + } // If statsObjOld is found, we can merge. ColumnStatsMerger merger = ColumnStatsMergerFactory.getColumnStatsMerger(statsObjNew, statsObjOld); merger.merge(statsObjNew, statsObjOld); } + // otherwise we can just insert the new statsObj list.add(statsObjNew); } csNew.setStatsObj(list); } + + public static List getColumnNames(List schema) { + List cols = new ArrayList<>(); + for (FieldSchema fs : schema) { + cols.add(fs.getName()); + } + return cols; + } } diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/ColumnStatsMergerFactory.java b/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/ColumnStatsMergerFactory.java index da6cd46..84a5943 100644 --- a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/ColumnStatsMergerFactory.java +++ b/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/ColumnStatsMergerFactory.java @@ -48,12 +48,8 @@ private static int countNumBitVectors(String s) { public static ColumnStatsMerger getColumnStatsMerger(ColumnStatisticsObj statsObjNew, ColumnStatisticsObj statsObjOld) { ColumnStatsMerger agg; - _Fields typeNew = statsObjNew.getStatsData().getSetField(); - _Fields typeOld = statsObjOld.getStatsData().getSetField(); - // make sure that they have the same type - typeNew = typeNew == typeOld ? typeNew : null; int numBitVectors = 0; - switch (typeNew) { + switch (statsObjNew.getStatsData().getSetField()) { case BOOLEAN_STATS: agg = new BooleanColumnStatsMerger(); break; @@ -89,7 +85,7 @@ public static ColumnStatsMerger getColumnStatsMerger(ColumnStatisticsObj statsOb break; } default: - throw new RuntimeException("Woh, bad. Unknown stats type " + typeNew.toString()); + throw new RuntimeException("Woh, bad. Unknown stats type " + statsObjNew.getStatsData().getSetField()); } if (numBitVectors > 0) { agg.ndvEstimator = new NumDistinctValueEstimator(numBitVectors); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/ColumnStatsTask.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/ColumnStatsTask.java index d96f432..afe69a4 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/ColumnStatsTask.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/ColumnStatsTask.java @@ -26,6 +26,7 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import org.apache.hadoop.hive.common.StatsSetupConst; import org.apache.hadoop.hive.common.type.HiveDecimal; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.conf.HiveConf.ConfVars; @@ -44,6 +45,7 @@ import org.apache.hadoop.hive.metastore.api.FieldSchema; import org.apache.hadoop.hive.metastore.api.LongColumnStatsData; import org.apache.hadoop.hive.metastore.api.MetaException; +import org.apache.hadoop.hive.metastore.api.Partition; import org.apache.hadoop.hive.metastore.api.SetPartitionsStatsRequest; import org.apache.hadoop.hive.metastore.api.StringColumnStatsData; import org.apache.hadoop.hive.ql.CompilationOpContext; @@ -370,23 +372,55 @@ private void unpackStructObject(ObjectInspector oi, Object o, String fName, } } + List prunedStatsObjs = new ArrayList(); if (!isTblLevel) { List partVals = new ArrayList(); // Iterate over partition columns to figure out partition name for (int i = fields.size() - partColSchema.size(); i < fields.size(); i++) { - Object partVal = ((PrimitiveObjectInspector)fields.get(i).getFieldObjectInspector()). - getPrimitiveJavaObject(list.get(i)); + Object partVal = ((PrimitiveObjectInspector) fields.get(i).getFieldObjectInspector()) + .getPrimitiveJavaObject(list.get(i)); partVals.add(partVal == null ? // could be null for default partition - this.conf.getVar(ConfVars.DEFAULTPARTITIONNAME) : partVal.toString()); + this.conf.getVar(ConfVars.DEFAULTPARTITIONNAME) + : partVal.toString()); } partName = Warehouse.makePartName(partColSchema, partVals); + if (work.getColStats().isNeedMerge()) { + // check all the columns stats + List partNames = new ArrayList<>(); + partNames.add(partName); + org.apache.hadoop.hive.ql.metadata.Partition partition = db + .getPartitionsByNames(tbl, partNames).iterator().next(); + for (ColumnStatisticsObj statsObj : statsObjs) { + if (StatsSetupConst.canColumnStatsMerge(partition.getParameters(), + statsObj.getColName())) { + prunedStatsObjs.add(statsObj); + } else { + LOG.info("Skip merging column stats for " + statsObj.getColName()); + } + } + } else { + prunedStatsObjs.addAll(statsObjs); + } + } else { + if (work.getColStats().isNeedMerge()) { + // check all the columns stats + for (ColumnStatisticsObj statsObj : statsObjs) { + if (StatsSetupConst.canColumnStatsMerge(tbl.getParameters(), statsObj.getColName())) { + prunedStatsObjs.add(statsObj); + } else { + LOG.info("Skip merging column stats for " + statsObj.getColName()); + } + } + } else { + prunedStatsObjs.addAll(statsObjs); + } } String [] names = Utilities.getDbTableName(currentDb, tableName); ColumnStatisticsDesc statsDesc = getColumnStatsDesc(names[0], names[1], partName, isTblLevel); ColumnStatistics colStats = new ColumnStatistics(); colStats.setStatsDesc(statsDesc); - colStats.setStatsObj(statsObjs); - if (!statsObjs.isEmpty()) { + colStats.setStatsObj(prunedStatsObjs); + if (!colStats.getStatsObj().isEmpty()) { stats.add(colStats); } } @@ -419,9 +453,7 @@ private int persistColumnStats(Hive db) throws HiveException, MetaException, IOE return 0; } SetPartitionsStatsRequest request = new SetPartitionsStatsRequest(colStats); - if (work.getColStats() != null && work.getColStats().getNumBitVector() > 0) { - request.setNeedMerge(true); - } + request.setNeedMerge(work.getColStats().isNeedMerge()); db.setPartitionColumnStatistics(request); return 0; } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java index 917e565..f9f0a50 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java @@ -4525,7 +4525,7 @@ private int createTableLike(Hive db, CreateTableLikeDesc crtTbl) throws Exceptio if (crtTbl.getLocation() == null && !tbl.isPartitioned() && conf.getBoolVar(HiveConf.ConfVars.HIVESTATSAUTOGATHER)) { StatsSetupConst.setBasicStatsStateForCreateTable(tbl.getTTable().getParameters(), - StatsSetupConst.TRUE); + MetaStoreUtils.getColumnNames(tbl.getCols()), StatsSetupConst.TRUE); } // create the table @@ -4765,7 +4765,7 @@ private boolean needToUpdateStats(Map props, EnvironmentContext e StatsSetupConst.setBasicStatsState(props, StatsSetupConst.TRUE); environmentContext.putToProperties(StatsSetupConst.STATS_GENERATED, StatsSetupConst.TASK); //then invalidate column stats - StatsSetupConst.clearColumnStatsState(props); + StatsSetupConst.clearColumnStatsState(props, false); return statsPresent; } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/MoveTask.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/MoveTask.java index f329b51..0972b7c 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/MoveTask.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/MoveTask.java @@ -24,8 +24,10 @@ import java.util.Arrays; import java.util.LinkedHashMap; import java.util.LinkedHashSet; +import java.util.LinkedList; import java.util.List; import java.util.Map; +import java.util.Queue; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; @@ -238,6 +240,27 @@ public boolean hasFollowingStatsTask() { return false; } + public boolean hasFollowingColumnStatsTaskNeedMerge() { + Queue> tasks = new LinkedList<>(); + tasks.offer(this); + Task t = null; + while (!tasks.isEmpty()) { + t = tasks.poll(); + if (t instanceof ColumnStatsTask) { + break; + } else if (t.getNumChild() != 0) { + for (Task child : t.getChildTasks()) { + tasks.offer(child); + } + } + } + if (t instanceof ColumnStatsTask) { + return ((ColumnStatsTask) t).getWork().getColStats().isNeedMerge(); + } else { + return false; + } + } + @Override public int execute(DriverContext driverContext) { @@ -245,6 +268,8 @@ public int execute(DriverContext driverContext) { if (driverContext.getCtx().getExplainAnalyze() == AnalyzeState.RUNNING) { return 0; } + boolean hasFollowingStatsTask = hasFollowingStatsTask(); + boolean hasFollowingColumnStatsTaskNeedMerge = hasFollowingColumnStatsTaskNeedMerge(); Hive db = getHive(); // Do any hive related operations like moving tables and files @@ -357,7 +382,7 @@ public int execute(DriverContext driverContext) { db.loadTable(tbd.getSourcePath(), tbd.getTable().getTableName(), tbd.getReplace(), work.isSrcLocal(), isSkewedStoredAsDirs(tbd), work.getLoadTableWork().getWriteType() != AcidUtils.Operation.NOT_ACID, - hasFollowingStatsTask()); + hasFollowingStatsTask, hasFollowingColumnStatsTaskNeedMerge); if (work.getOutputs() != null) { DDLTask.addIfAbsentByName(new WriteEntity(table, getWriteType(tbd, work.getLoadTableWork().getWriteType())), work.getOutputs()); @@ -434,7 +459,7 @@ public int execute(DriverContext driverContext) { dpCtx.getNumDPCols(), isSkewedStoredAsDirs(tbd), work.getLoadTableWork().getWriteType() != AcidUtils.Operation.NOT_ACID, - SessionState.get().getTxnMgr().getCurrentTxnId(), hasFollowingStatsTask(), + SessionState.get().getTxnMgr().getCurrentTxnId(), hasFollowingStatsTask, hasFollowingColumnStatsTaskNeedMerge, work.getLoadTableWork().getWriteType()); // publish DP columns to its subscribers @@ -500,7 +525,7 @@ public int execute(DriverContext driverContext) { db.loadPartition(tbd.getSourcePath(), tbd.getTable().getTableName(), tbd.getPartitionSpec(), tbd.getReplace(), tbd.getInheritTableSpecs(), isSkewedStoredAsDirs(tbd), work.isSrcLocal(), - work.getLoadTableWork().getWriteType() != AcidUtils.Operation.NOT_ACID, hasFollowingStatsTask()); + work.getLoadTableWork().getWriteType() != AcidUtils.Operation.NOT_ACID, hasFollowingStatsTask, hasFollowingColumnStatsTaskNeedMerge); Partition partn = db.getPartition(table, tbd.getPartitionSpec(), false); if (bucketCols != null || sortCols != null) { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java index ec36487..b7aa993 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java @@ -1574,10 +1574,10 @@ public Database getDatabaseCurrent() throws HiveException { public void loadPartition(Path loadPath, String tableName, Map partSpec, boolean replace, boolean inheritTableSpecs, boolean isSkewedStoreAsSubdir, - boolean isSrcLocal, boolean isAcid, boolean hasFollowingStatsTask) throws HiveException { + boolean isSrcLocal, boolean isAcid, boolean hasFollowingStatsTask, boolean hasFollowingColumnStatsTaskNeedMerge) throws HiveException { Table tbl = getTable(tableName); loadPartition(loadPath, tbl, partSpec, replace, inheritTableSpecs, - isSkewedStoreAsSubdir, isSrcLocal, isAcid, hasFollowingStatsTask); + isSkewedStoreAsSubdir, isSrcLocal, isAcid, hasFollowingStatsTask, hasFollowingColumnStatsTaskNeedMerge); } /** @@ -1604,7 +1604,7 @@ public void loadPartition(Path loadPath, String tableName, public Partition loadPartition(Path loadPath, Table tbl, Map partSpec, boolean replace, boolean inheritTableSpecs, boolean isSkewedStoreAsSubdir, - boolean isSrcLocal, boolean isAcid, boolean hasFollowingStatsTask) throws HiveException { + boolean isSrcLocal, boolean isAcid, boolean hasFollowingStatsTask, boolean hasFollowingColumnStatsTaskNeedMerge) throws HiveException { Path tblDataLocationPath = tbl.getDataLocation(); try { @@ -1668,7 +1668,7 @@ public Partition loadPartition(Path loadPath, Table tbl, } //column stats will be inaccurate - StatsSetupConst.clearColumnStatsState(newTPart.getParameters()); + StatsSetupConst.clearColumnStatsState(newTPart.getParameters(), hasFollowingColumnStatsTaskNeedMerge); // recreate the partition if it existed before if (isSkewedStoreAsSubdir) { @@ -1685,11 +1685,6 @@ public Partition loadPartition(Path loadPath, Table tbl, StatsSetupConst.setBasicStatsState(newTPart.getParameters(), StatsSetupConst.FALSE); } if (oldPart == null) { - newTPart.getTPartition().setParameters(new HashMap()); - if (this.getConf().getBoolVar(HiveConf.ConfVars.HIVESTATSAUTOGATHER)) { - StatsSetupConst.setBasicStatsStateForCreateTable(newTPart.getParameters(), - StatsSetupConst.TRUE); - } MetaStoreUtils.populateQuickStats(HiveStatsUtils.getFileStatusRecurse(newPartPath, -1, newPartPath.getFileSystem(conf)), newTPart.getParameters()); try { LOG.debug("Adding new partition " + newTPart.getSpec()); @@ -1877,7 +1872,7 @@ private void constructOneLBLocationMap(FileStatus fSta, public Map, Partition> loadDynamicPartitions(final Path loadPath, final String tableName, final Map partSpec, final boolean replace, final int numDP, final boolean listBucketingEnabled, final boolean isAcid, final long txnId, - final boolean hasFollowingStatsTask, final AcidUtils.Operation operation) + final boolean hasFollowingStatsTask, final boolean hasFollowingColumnStatsTaskNeedMerge, final AcidUtils.Operation operation) throws HiveException { final Map, Partition> partitionsMap = @@ -1921,7 +1916,7 @@ public Void call() throws Exception { // load the partition Partition newPartition = loadPartition(partPath, tbl, fullPartSpec, replace, true, listBucketingEnabled, - false, isAcid, hasFollowingStatsTask); + false, isAcid, hasFollowingStatsTask, hasFollowingColumnStatsTaskNeedMerge); partitionsMap.put(fullPartSpec, newPartition); if (inPlaceEligible) { @@ -2002,7 +1997,7 @@ public Void call() throws Exception { * @param isAcid true if this is an ACID based write */ public void loadTable(Path loadPath, String tableName, boolean replace, boolean isSrcLocal, - boolean isSkewedStoreAsSubdir, boolean isAcid, boolean hasFollowingStatsTask) + boolean isSkewedStoreAsSubdir, boolean isAcid, boolean hasFollowingStatsTask, boolean hasFollowingColumnStatsTaskNeedMerge) throws HiveException { List newFiles = null; @@ -2029,7 +2024,7 @@ public void loadTable(Path loadPath, String tableName, boolean replace, boolean } //column stats will be inaccurate - StatsSetupConst.clearColumnStatsState(tbl.getParameters()); + StatsSetupConst.clearColumnStatsState(tbl.getParameters(), hasFollowingColumnStatsTaskNeedMerge); try { if (isSkewedStoreAsSubdir) { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Partition.java b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Partition.java index bff1688..f116fdb 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Partition.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Partition.java @@ -27,6 +27,7 @@ import java.util.Map; import java.util.Properties; +import org.apache.hadoop.hive.common.StatsSetupConst; import org.apache.hadoop.hive.common.StringInternUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -140,6 +141,11 @@ public Partition(Table tbl, Map partSpec, Path location) throws tpart.setSd(tbl.getSd().deepCopy()); tpart.getSd().setLocation((location != null) ? location.toString() : null); } + + Map emptyParameters = new HashMap(); + tpart.setParameters(emptyParameters); + StatsSetupConst.setBasicStatsStateForCreateTable(tpart.getParameters(), + MetaStoreUtils.getColumnNames(tbl.getCols()), StatsSetupConst.TRUE); return tpart; } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java index 0cf9205..b39f3e1 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java @@ -2891,8 +2891,13 @@ private void analyzeAlterTableAddParts(String[] qualified, CommonTree ast, boole if (desc.getPartParams() == null) { desc.setPartParams(new HashMap()); } - StatsSetupConst.setBasicStatsStateForCreateTable(desc.getPartParams(), - StatsSetupConst.TRUE); + if (desc.getCols() != null) { + StatsSetupConst.setBasicStatsStateForCreateTable(desc.getPartParams(), + MetaStoreUtils.getColumnNames(desc.getCols()), StatsSetupConst.TRUE); + } else { + StatsSetupConst.setBasicStatsStateForCreateTable(desc.getPartParams(), + MetaStoreUtils.getColumnNames(tab.getCols()), StatsSetupConst.TRUE); + } } } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/ColumnStatsDesc.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/ColumnStatsDesc.java index 97f323f..cf65bc6 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/ColumnStatsDesc.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/ColumnStatsDesc.java @@ -30,6 +30,7 @@ private static final long serialVersionUID = 1L; private boolean isTblLevel; private int numBitVector; + private boolean needMerge; private String tableName; private List colName; private List colType; @@ -44,6 +45,7 @@ public ColumnStatsDesc(String tableName, List colName, List colT this.colType = colType; this.isTblLevel = isTblLevel; this.numBitVector = 0; + this.needMerge = false; } public ColumnStatsDesc(String tableName, List colName, @@ -53,6 +55,7 @@ public ColumnStatsDesc(String tableName, List colName, this.colType = colType; this.isTblLevel = isTblLevel; this.numBitVector = numBitVector; + this.needMerge = this.numBitVector!=0; } @Explain(displayName = "Table") @@ -95,8 +98,8 @@ public int getNumBitVector() { return numBitVector; } - public void setNumBitVector(int numBitVector) { - this.numBitVector = numBitVector; + public boolean isNeedMerge() { + return needMerge; } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/CreateTableDesc.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/CreateTableDesc.java index 4f614a8..e74b1f4 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/CreateTableDesc.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/CreateTableDesc.java @@ -28,6 +28,7 @@ import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.common.StatsSetupConst; import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.metastore.MetaStoreUtils; import org.apache.hadoop.hive.metastore.TableType; import org.apache.hadoop.hive.metastore.api.FieldSchema; import org.apache.hadoop.hive.metastore.api.Order; @@ -837,10 +838,10 @@ public Table toTable(HiveConf conf) throws HiveException { if (getLocation() == null && !this.isCTAS) { if (!tbl.isPartitioned() && conf.getBoolVar(HiveConf.ConfVars.HIVESTATSAUTOGATHER)) { StatsSetupConst.setBasicStatsStateForCreateTable(tbl.getTTable().getParameters(), - StatsSetupConst.TRUE); + MetaStoreUtils.getColumnNames(tbl.getCols()), StatsSetupConst.TRUE); } } else { - StatsSetupConst.setBasicStatsStateForCreateTable(tbl.getTTable().getParameters(), + StatsSetupConst.setBasicStatsStateForCreateTable(tbl.getTTable().getParameters(), null, StatsSetupConst.FALSE); } return tbl; diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/TestExecDriver.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/TestExecDriver.java index c7266bc..7a31f58 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/exec/TestExecDriver.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/TestExecDriver.java @@ -141,7 +141,7 @@ db.dropTable(MetaStoreUtils.DEFAULT_DATABASE_NAME, src, true, true); db.createTable(src, cols, null, TextInputFormat.class, HiveIgnoreKeyTextOutputFormat.class); - db.loadTable(hadoopDataFile[i], src, false, true, false, false, false); + db.loadTable(hadoopDataFile[i], src, false, true, false, false, false, false); i++; } diff --git a/ql/src/test/queries/clientpositive/alterColumnStats.q b/ql/src/test/queries/clientpositive/alterColumnStats.q new file mode 100644 index 0000000..9e63343 --- /dev/null +++ b/ql/src/test/queries/clientpositive/alterColumnStats.q @@ -0,0 +1,60 @@ +set hive.mapred.mode=nonstrict; + +drop table p; + +set hive.stats.column.autogather=false; + +CREATE TABLE p(insert_num int, c1 tinyint, c2 smallint); + +desc formatted p; + +insert into p values (1,22,333); + +desc formatted p; + +alter table p replace columns (insert_num int, c1 STRING, c2 STRING); + +set hive.stats.column.autogather=true; + +explain insert into p values (1,'2','3'); + +insert into p values (1,'2','3'); + +desc formatted p; + +desc formatted p c1; + +desc formatted p c2; + + +drop table p; + +set hive.stats.column.autogather=true; + +CREATE TABLE p(insert_num int, c1 tinyint, c2 smallint); + +desc formatted p; + +explain insert into p values (1,22,333); + +insert into p values (1,22,333); + +desc formatted p; + +desc formatted p c1; + +alter table p replace columns (insert_num int, c1 STRING, c2 STRING, c3 STRING); + +desc formatted p; + +explain insert into p values (1,'2','3','4'); + +insert into p values (1,'2','3','4'); + +desc formatted p; + +desc formatted p insert_num; + +desc formatted p c2; + +desc formatted p c3; diff --git a/ql/src/test/queries/clientpositive/alterColumnStatsPart.q b/ql/src/test/queries/clientpositive/alterColumnStatsPart.q new file mode 100644 index 0000000..8a47b4e --- /dev/null +++ b/ql/src/test/queries/clientpositive/alterColumnStatsPart.q @@ -0,0 +1,114 @@ +set hive.mapred.mode=nonstrict; +set hive.exec.dynamic.partition.mode=nonstrict; + +drop table p; + +set hive.stats.column.autogather=false; + +CREATE TABLE p(a int, b int) partitioned by (c int); + +desc formatted p; + +insert into p partition (c=1) values (1,2); + +desc formatted p partition (c=1) a; + +explain select max(a) from p where c=1; + +analyze table p partition(c=1) compute statistics for columns a; + +explain select max(a) from p where c=1; + +insert into p partition (c) values (2,3,4); + +insert into p partition (c) values (4,5,6); + +desc formatted p partition(c=4); + +explain select max(a) from p where c=4; + +alter table p add partition (c=100); + +explain select max(a) from p where c=100; + +analyze table p partition(c=100) compute statistics for columns a; + +explain select max(a) from p where c=100; + +desc formatted p partition(c=100); + +insert into p partition (c=100) values (1,2); + +analyze table p partition(c=100) compute statistics for columns a; + +explain select max(a) from p where c=100; + +-- now test true +set hive.stats.column.autogather=true; + +drop table p; + +CREATE TABLE p(a int, b int) partitioned by (c int); + +desc formatted p; + +insert into p partition (c=1) values (1,2); + +desc formatted p partition (c=1) a; + +explain select max(a) from p where c=1; + +insert into p partition (c) values (2,3,4); + +insert into p partition (c) values (4,5,6); + +desc formatted p partition(c=4); + +explain select max(a) from p where c=4; + +alter table p add partition (c=100); + +desc formatted p partition (c=100) a; + +explain select max(a) from p where c=100; + +desc formatted p partition(c=100); + +insert into p partition (c=100) values (1,2); + +explain select max(a) from p where c=100; + +-- now test mix + +drop table p; + +CREATE TABLE p(a int, b int) partitioned by (c int); + +set hive.stats.column.autogather=false; + +insert into p partition (c) values (2,3,4); + +explain select max(a) from p where c=4; + +analyze table p partition(c=4) compute statistics for columns a; + +desc formatted p partition(c=4); + +explain select max(a) from p where c=4; + +explain select max(b) from p where c=4; + +set hive.stats.column.autogather=true; + +insert into p partition (c) values (20,30,4); + +desc formatted p partition(c=4); + +explain select min(a) from p where c=4; + +explain select min(b) from p where c=4; + +select min(a) from p where c=4; + +select min(b) from p where c=4; + diff --git a/ql/src/test/results/clientpositive/alterColumnStats.q.out b/ql/src/test/results/clientpositive/alterColumnStats.q.out new file mode 100644 index 0000000..5a062c5 --- /dev/null +++ b/ql/src/test/results/clientpositive/alterColumnStats.q.out @@ -0,0 +1,758 @@ +PREHOOK: query: drop table p +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table p +POSTHOOK: type: DROPTABLE +PREHOOK: query: CREATE TABLE p(insert_num int, c1 tinyint, c2 smallint) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@p +POSTHOOK: query: CREATE TABLE p(insert_num int, c1 tinyint, c2 smallint) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@p +PREHOOK: query: desc formatted p +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@p +POSTHOOK: query: desc formatted p +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@p +# col_name data_type comment + +insert_num int +c1 tinyint +c2 smallint + +# Detailed Table Information +Database: default +#### A masked pattern was here #### +Retention: 0 +#### A masked pattern was here #### +Table Type: MANAGED_TABLE +Table Parameters: + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"c1\":\"true\",\"c2\":\"true\",\"insert_num\":\"true\"}} + numFiles 0 + numRows 0 + rawDataSize 0 + totalSize 0 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: insert into p values (1,22,333) +PREHOOK: type: QUERY +PREHOOK: Output: default@p +POSTHOOK: query: insert into p values (1,22,333) +POSTHOOK: type: QUERY +POSTHOOK: Output: default@p +POSTHOOK: Lineage: p.c1 EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: p.c2 EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: p.insert_num EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +PREHOOK: query: desc formatted p +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@p +POSTHOOK: query: desc formatted p +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@p +# col_name data_type comment + +insert_num int +c1 tinyint +c2 smallint + +# Detailed Table Information +Database: default +#### A masked pattern was here #### +Retention: 0 +#### A masked pattern was here #### +Table Type: MANAGED_TABLE +Table Parameters: + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + numFiles 1 + numRows 1 + rawDataSize 8 + totalSize 9 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: alter table p replace columns (insert_num int, c1 STRING, c2 STRING) +PREHOOK: type: ALTERTABLE_REPLACECOLS +PREHOOK: Input: default@p +PREHOOK: Output: default@p +POSTHOOK: query: alter table p replace columns (insert_num int, c1 STRING, c2 STRING) +POSTHOOK: type: ALTERTABLE_REPLACECOLS +POSTHOOK: Input: default@p +POSTHOOK: Output: default@p +PREHOOK: query: explain insert into p values (1,'2','3') +PREHOOK: type: QUERY +POSTHOOK: query: explain insert into p values (1,'2','3') +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 + Stage-4 + Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 + Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 + Stage-3 + Stage-5 + Stage-6 depends on stages: Stage-5 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: values__tmp__table__2 + Statistics: Num rows: 1 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: UDFToInteger(tmp_values_col1) (type: int), tmp_values_col2 (type: string), tmp_values_col3 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 6 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 6 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.p + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string) + outputColumnNames: insert_num, c1, c2 + Statistics: Num rows: 1 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(insert_num, 16), compute_stats(c1, 16), compute_stats(c2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1464 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1464 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-7 + Conditional Operator + + Stage: Stage-4 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### + + Stage: Stage-0 + Move Operator + tables: + replace: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.p + + Stage: Stage-2 + Stats-Aggr Operator + + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: insert_num, c1, c2 + Column Types: int, string, string + Table: default.p + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.p + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.p + + Stage: Stage-6 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### + +PREHOOK: query: insert into p values (1,'2','3') +PREHOOK: type: QUERY +PREHOOK: Output: default@p +POSTHOOK: query: insert into p values (1,'2','3') +POSTHOOK: type: QUERY +POSTHOOK: Output: default@p +POSTHOOK: Lineage: p.c1 SIMPLE [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: p.c2 SIMPLE [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: p.insert_num EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +PREHOOK: query: desc formatted p +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@p +POSTHOOK: query: desc formatted p +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@p +# col_name data_type comment + +insert_num int +c1 string +c2 string + +# Detailed Table Information +Database: default +#### A masked pattern was here #### +Retention: 0 +#### A masked pattern was here #### +Table Type: MANAGED_TABLE +Table Parameters: + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} +#### A masked pattern was here #### + numFiles 2 + numRows 2 + rawDataSize 13 + totalSize 15 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: desc formatted p c1 +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@p +POSTHOOK: query: desc formatted p c1 +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@p +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment + +c1 string from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} +PREHOOK: query: desc formatted p c2 +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@p +POSTHOOK: query: desc formatted p c2 +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@p +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment + +c2 string from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} +PREHOOK: query: drop table p +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@p +PREHOOK: Output: default@p +POSTHOOK: query: drop table p +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@p +POSTHOOK: Output: default@p +PREHOOK: query: CREATE TABLE p(insert_num int, c1 tinyint, c2 smallint) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@p +POSTHOOK: query: CREATE TABLE p(insert_num int, c1 tinyint, c2 smallint) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@p +PREHOOK: query: desc formatted p +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@p +POSTHOOK: query: desc formatted p +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@p +# col_name data_type comment + +insert_num int +c1 tinyint +c2 smallint + +# Detailed Table Information +Database: default +#### A masked pattern was here #### +Retention: 0 +#### A masked pattern was here #### +Table Type: MANAGED_TABLE +Table Parameters: + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"c1\":\"true\",\"c2\":\"true\",\"insert_num\":\"true\"}} + numFiles 0 + numRows 0 + rawDataSize 0 + totalSize 0 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: explain insert into p values (1,22,333) +PREHOOK: type: QUERY +POSTHOOK: query: explain insert into p values (1,22,333) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 + Stage-4 + Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 + Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 + Stage-3 + Stage-5 + Stage-6 depends on stages: Stage-5 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: values__tmp__table__4 + Statistics: Num rows: 1 Data size: 9 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: UDFToInteger(tmp_values_col1) (type: int), UDFToByte(tmp_values_col2) (type: tinyint), UDFToShort(tmp_values_col3) (type: smallint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 9 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 9 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.p + Select Operator + expressions: _col0 (type: int), _col1 (type: tinyint), _col2 (type: smallint) + outputColumnNames: insert_num, c1, c2 + Statistics: Num rows: 1 Data size: 9 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(insert_num, 16), compute_stats(c1, 16), compute_stats(c2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1428 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1428 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-7 + Conditional Operator + + Stage: Stage-4 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### + + Stage: Stage-0 + Move Operator + tables: + replace: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.p + + Stage: Stage-2 + Stats-Aggr Operator + + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: insert_num, c1, c2 + Column Types: int, tinyint, smallint + Table: default.p + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.p + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.p + + Stage: Stage-6 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### + +PREHOOK: query: insert into p values (1,22,333) +PREHOOK: type: QUERY +PREHOOK: Output: default@p +POSTHOOK: query: insert into p values (1,22,333) +POSTHOOK: type: QUERY +POSTHOOK: Output: default@p +POSTHOOK: Lineage: p.c1 EXPRESSION [(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: p.c2 EXPRESSION [(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: p.insert_num EXPRESSION [(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +PREHOOK: query: desc formatted p +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@p +POSTHOOK: query: desc formatted p +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@p +# col_name data_type comment + +insert_num int +c1 tinyint +c2 smallint + +# Detailed Table Information +Database: default +#### A masked pattern was here #### +Retention: 0 +#### A masked pattern was here #### +Table Type: MANAGED_TABLE +Table Parameters: + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"c1\":\"true\",\"c2\":\"true\",\"insert_num\":\"true\"}} + numFiles 1 + numRows 1 + rawDataSize 8 + totalSize 9 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: desc formatted p c1 +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@p +POSTHOOK: query: desc formatted p c1 +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@p +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment + +c1 tinyint 22 22 0 1 from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"c1\":\"true\",\"c2\":\"true\",\"insert_num\":\"true\"}} +PREHOOK: query: alter table p replace columns (insert_num int, c1 STRING, c2 STRING, c3 STRING) +PREHOOK: type: ALTERTABLE_REPLACECOLS +PREHOOK: Input: default@p +PREHOOK: Output: default@p +POSTHOOK: query: alter table p replace columns (insert_num int, c1 STRING, c2 STRING, c3 STRING) +POSTHOOK: type: ALTERTABLE_REPLACECOLS +POSTHOOK: Input: default@p +POSTHOOK: Output: default@p +PREHOOK: query: desc formatted p +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@p +POSTHOOK: query: desc formatted p +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@p +# col_name data_type comment + +insert_num int +c1 string +c2 string +c3 string + +# Detailed Table Information +Database: default +#### A masked pattern was here #### +Retention: 0 +#### A masked pattern was here #### +Table Type: MANAGED_TABLE +Table Parameters: + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"insert_num\":\"true\"}} +#### A masked pattern was here #### + numFiles 1 + numRows 1 + rawDataSize 8 + totalSize 9 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: explain insert into p values (1,'2','3','4') +PREHOOK: type: QUERY +POSTHOOK: query: explain insert into p values (1,'2','3','4') +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 + Stage-4 + Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 + Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 + Stage-3 + Stage-5 + Stage-6 depends on stages: Stage-5 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: values__tmp__table__6 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: UDFToInteger(tmp_values_col1) (type: int), tmp_values_col2 (type: string), tmp_values_col3 (type: string), tmp_values_col4 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.p + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string) + outputColumnNames: insert_num, c1, c2, c3 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(insert_num, 16), compute_stats(c1, 16), compute_stats(c2, 16), compute_stats(c3, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1952 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1952 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1956 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1956 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-7 + Conditional Operator + + Stage: Stage-4 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### + + Stage: Stage-0 + Move Operator + tables: + replace: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.p + + Stage: Stage-2 + Stats-Aggr Operator + + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: insert_num, c1, c2, c3 + Column Types: int, string, string, string + Table: default.p + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.p + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.p + + Stage: Stage-6 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### + +PREHOOK: query: insert into p values (1,'2','3','4') +PREHOOK: type: QUERY +PREHOOK: Output: default@p +POSTHOOK: query: insert into p values (1,'2','3','4') +POSTHOOK: type: QUERY +POSTHOOK: Output: default@p +POSTHOOK: Lineage: p.c1 SIMPLE [(values__tmp__table__7)values__tmp__table__7.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: p.c2 SIMPLE [(values__tmp__table__7)values__tmp__table__7.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: p.c3 SIMPLE [(values__tmp__table__7)values__tmp__table__7.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +POSTHOOK: Lineage: p.insert_num EXPRESSION [(values__tmp__table__7)values__tmp__table__7.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +PREHOOK: query: desc formatted p +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@p +POSTHOOK: query: desc formatted p +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@p +# col_name data_type comment + +insert_num int +c1 string +c2 string +c3 string + +# Detailed Table Information +Database: default +#### A masked pattern was here #### +Retention: 0 +#### A masked pattern was here #### +Table Type: MANAGED_TABLE +Table Parameters: + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"insert_num\":\"true\"}} +#### A masked pattern was here #### + numFiles 2 + numRows 2 + rawDataSize 15 + totalSize 17 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: desc formatted p insert_num +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@p +POSTHOOK: query: desc formatted p insert_num +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@p +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment + +insert_num int 1 1 0 1 from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"insert_num\":\"true\"}} +PREHOOK: query: desc formatted p c2 +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@p +POSTHOOK: query: desc formatted p c2 +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@p +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment + +c2 string from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"insert_num\":\"true\"}} +PREHOOK: query: desc formatted p c3 +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@p +POSTHOOK: query: desc formatted p c3 +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@p +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment + +c3 string from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"insert_num\":\"true\"}} diff --git a/ql/src/test/results/clientpositive/alterColumnStatsPart.q.out b/ql/src/test/results/clientpositive/alterColumnStatsPart.q.out new file mode 100644 index 0000000..a659a8b --- /dev/null +++ b/ql/src/test/results/clientpositive/alterColumnStatsPart.q.out @@ -0,0 +1,1023 @@ +PREHOOK: query: drop table p +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table p +POSTHOOK: type: DROPTABLE +PREHOOK: query: CREATE TABLE p(a int, b int) partitioned by (c int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@p +POSTHOOK: query: CREATE TABLE p(a int, b int) partitioned by (c int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@p +PREHOOK: query: desc formatted p +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@p +POSTHOOK: query: desc formatted p +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@p +# col_name data_type comment + +a int +b int + +# Partition Information +# col_name data_type comment + +c int + +# Detailed Table Information +Database: default +#### A masked pattern was here #### +Retention: 0 +#### A masked pattern was here #### +Table Type: MANAGED_TABLE +Table Parameters: + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + numFiles 0 + numPartitions 0 + numRows 0 + rawDataSize 0 + totalSize 0 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: insert into p partition (c=1) values (1,2) +PREHOOK: type: QUERY +PREHOOK: Output: default@p@c=1 +POSTHOOK: query: insert into p partition (c=1) values (1,2) +POSTHOOK: type: QUERY +POSTHOOK: Output: default@p@c=1 +POSTHOOK: Lineage: p PARTITION(c=1).a EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: p PARTITION(c=1).b EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +PREHOOK: query: desc formatted p partition (c=1) a +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@p +POSTHOOK: query: desc formatted p partition (c=1) a +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@p +# col_name data_type comment + +a int from deserializer +PREHOOK: query: explain select max(a) from p where c=1 +PREHOOK: type: QUERY +POSTHOOK: query: explain select max(a) from p where c=1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: p + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: a (type: int) + outputColumnNames: a + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: max(a) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int) + Reduce Operator Tree: + Group By Operator + aggregations: max(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: analyze table p partition(c=1) compute statistics for columns a +PREHOOK: type: QUERY +PREHOOK: Input: default@p +PREHOOK: Input: default@p@c=1 +#### A masked pattern was here #### +POSTHOOK: query: analyze table p partition(c=1) compute statistics for columns a +POSTHOOK: type: QUERY +POSTHOOK: Input: default@p +POSTHOOK: Input: default@p@c=1 +#### A masked pattern was here #### +PREHOOK: query: explain select max(a) from p where c=1 +PREHOOK: type: QUERY +POSTHOOK: query: explain select max(a) from p where c=1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: 1 + Processor Tree: + ListSink + +PREHOOK: query: insert into p partition (c) values (2,3,4) +PREHOOK: type: QUERY +PREHOOK: Output: default@p +POSTHOOK: query: insert into p partition (c) values (2,3,4) +POSTHOOK: type: QUERY +POSTHOOK: Output: default@p@c=4 +POSTHOOK: Lineage: p PARTITION(c=4).a EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: p PARTITION(c=4).b EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +PREHOOK: query: insert into p partition (c) values (4,5,6) +PREHOOK: type: QUERY +PREHOOK: Output: default@p +POSTHOOK: query: insert into p partition (c) values (4,5,6) +POSTHOOK: type: QUERY +POSTHOOK: Output: default@p@c=6 +POSTHOOK: Lineage: p PARTITION(c=6).a EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: p PARTITION(c=6).b EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +PREHOOK: query: desc formatted p partition(c=4) +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@p +POSTHOOK: query: desc formatted p partition(c=4) +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@p +# col_name data_type comment + +a int +b int + +# Partition Information +# col_name data_type comment + +c int + +# Detailed Partition Information +Partition Value: [4] +Database: default +Table: p +#### A masked pattern was here #### +Partition Parameters: + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + numFiles 1 + numRows 1 + rawDataSize 3 + totalSize 4 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: explain select max(a) from p where c=4 +PREHOOK: type: QUERY +POSTHOOK: query: explain select max(a) from p where c=4 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: p + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: a (type: int) + outputColumnNames: a + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: max(a) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int) + Reduce Operator Tree: + Group By Operator + aggregations: max(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: alter table p add partition (c=100) +PREHOOK: type: ALTERTABLE_ADDPARTS +PREHOOK: Output: default@p +POSTHOOK: query: alter table p add partition (c=100) +POSTHOOK: type: ALTERTABLE_ADDPARTS +POSTHOOK: Output: default@p +POSTHOOK: Output: default@p@c=100 +PREHOOK: query: explain select max(a) from p where c=100 +PREHOOK: type: QUERY +POSTHOOK: query: explain select max(a) from p where c=100 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: p + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: a (type: int) + outputColumnNames: a + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: max(a) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int) + Reduce Operator Tree: + Group By Operator + aggregations: max(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: analyze table p partition(c=100) compute statistics for columns a +PREHOOK: type: QUERY +PREHOOK: Input: default@p +PREHOOK: Input: default@p@c=100 +#### A masked pattern was here #### +POSTHOOK: query: analyze table p partition(c=100) compute statistics for columns a +POSTHOOK: type: QUERY +POSTHOOK: Input: default@p +POSTHOOK: Input: default@p@c=100 +#### A masked pattern was here #### +PREHOOK: query: explain select max(a) from p where c=100 +PREHOOK: type: QUERY +POSTHOOK: query: explain select max(a) from p where c=100 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: p + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: a (type: int) + outputColumnNames: a + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: max(a) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int) + Reduce Operator Tree: + Group By Operator + aggregations: max(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: desc formatted p partition(c=100) +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@p +POSTHOOK: query: desc formatted p partition(c=100) +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@p +# col_name data_type comment + +a int +b int + +# Partition Information +# col_name data_type comment + +c int + +# Detailed Partition Information +Partition Value: [100] +Database: default +Table: p +#### A masked pattern was here #### +Partition Parameters: + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"a\":\"true\",\"b\":\"true\"}} + numFiles 0 + numRows 0 + rawDataSize 0 + totalSize 0 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: insert into p partition (c=100) values (1,2) +PREHOOK: type: QUERY +PREHOOK: Output: default@p@c=100 +POSTHOOK: query: insert into p partition (c=100) values (1,2) +POSTHOOK: type: QUERY +POSTHOOK: Output: default@p@c=100 +POSTHOOK: Lineage: p PARTITION(c=100).a EXPRESSION [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: p PARTITION(c=100).b EXPRESSION [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +PREHOOK: query: analyze table p partition(c=100) compute statistics for columns a +PREHOOK: type: QUERY +PREHOOK: Input: default@p +PREHOOK: Input: default@p@c=100 +#### A masked pattern was here #### +POSTHOOK: query: analyze table p partition(c=100) compute statistics for columns a +POSTHOOK: type: QUERY +POSTHOOK: Input: default@p +POSTHOOK: Input: default@p@c=100 +#### A masked pattern was here #### +PREHOOK: query: explain select max(a) from p where c=100 +PREHOOK: type: QUERY +POSTHOOK: query: explain select max(a) from p where c=100 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: 1 + Processor Tree: + ListSink + +PREHOOK: query: drop table p +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@p +PREHOOK: Output: default@p +POSTHOOK: query: drop table p +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@p +POSTHOOK: Output: default@p +PREHOOK: query: CREATE TABLE p(a int, b int) partitioned by (c int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@p +POSTHOOK: query: CREATE TABLE p(a int, b int) partitioned by (c int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@p +PREHOOK: query: desc formatted p +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@p +POSTHOOK: query: desc formatted p +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@p +# col_name data_type comment + +a int +b int + +# Partition Information +# col_name data_type comment + +c int + +# Detailed Table Information +Database: default +#### A masked pattern was here #### +Retention: 0 +#### A masked pattern was here #### +Table Type: MANAGED_TABLE +Table Parameters: + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + numFiles 0 + numPartitions 0 + numRows 0 + rawDataSize 0 + totalSize 0 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: insert into p partition (c=1) values (1,2) +PREHOOK: type: QUERY +PREHOOK: Output: default@p@c=1 +POSTHOOK: query: insert into p partition (c=1) values (1,2) +POSTHOOK: type: QUERY +POSTHOOK: Output: default@p@c=1 +POSTHOOK: Lineage: p PARTITION(c=1).a EXPRESSION [(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: p PARTITION(c=1).b EXPRESSION [(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +PREHOOK: query: desc formatted p partition (c=1) a +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@p +POSTHOOK: query: desc formatted p partition (c=1) a +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@p +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment + +a int 1 1 0 1 from deserializer +PREHOOK: query: explain select max(a) from p where c=1 +PREHOOK: type: QUERY +POSTHOOK: query: explain select max(a) from p where c=1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: 1 + Processor Tree: + ListSink + +PREHOOK: query: insert into p partition (c) values (2,3,4) +PREHOOK: type: QUERY +PREHOOK: Output: default@p +POSTHOOK: query: insert into p partition (c) values (2,3,4) +POSTHOOK: type: QUERY +POSTHOOK: Output: default@p@c=4 +POSTHOOK: Lineage: p PARTITION(c=4).a EXPRESSION [(values__tmp__table__6)values__tmp__table__6.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: p PARTITION(c=4).b EXPRESSION [(values__tmp__table__6)values__tmp__table__6.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +PREHOOK: query: insert into p partition (c) values (4,5,6) +PREHOOK: type: QUERY +PREHOOK: Output: default@p +POSTHOOK: query: insert into p partition (c) values (4,5,6) +POSTHOOK: type: QUERY +POSTHOOK: Output: default@p@c=6 +POSTHOOK: Lineage: p PARTITION(c=6).a EXPRESSION [(values__tmp__table__7)values__tmp__table__7.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: p PARTITION(c=6).b EXPRESSION [(values__tmp__table__7)values__tmp__table__7.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +PREHOOK: query: desc formatted p partition(c=4) +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@p +POSTHOOK: query: desc formatted p partition(c=4) +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@p +# col_name data_type comment + +a int +b int + +# Partition Information +# col_name data_type comment + +c int + +# Detailed Partition Information +Partition Value: [4] +Database: default +Table: p +#### A masked pattern was here #### +Partition Parameters: + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"a\":\"true\",\"b\":\"true\"}} + numFiles 1 + numRows 1 + rawDataSize 3 + totalSize 4 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: explain select max(a) from p where c=4 +PREHOOK: type: QUERY +POSTHOOK: query: explain select max(a) from p where c=4 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: 1 + Processor Tree: + ListSink + +PREHOOK: query: alter table p add partition (c=100) +PREHOOK: type: ALTERTABLE_ADDPARTS +PREHOOK: Output: default@p +POSTHOOK: query: alter table p add partition (c=100) +POSTHOOK: type: ALTERTABLE_ADDPARTS +POSTHOOK: Output: default@p +POSTHOOK: Output: default@p@c=100 +PREHOOK: query: desc formatted p partition (c=100) a +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@p +POSTHOOK: query: desc formatted p partition (c=100) a +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@p +# col_name data_type comment + +a int from deserializer +PREHOOK: query: explain select max(a) from p where c=100 +PREHOOK: type: QUERY +POSTHOOK: query: explain select max(a) from p where c=100 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: p + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: a (type: int) + outputColumnNames: a + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: max(a) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int) + Reduce Operator Tree: + Group By Operator + aggregations: max(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: desc formatted p partition(c=100) +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@p +POSTHOOK: query: desc formatted p partition(c=100) +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@p +# col_name data_type comment + +a int +b int + +# Partition Information +# col_name data_type comment + +c int + +# Detailed Partition Information +Partition Value: [100] +Database: default +Table: p +#### A masked pattern was here #### +Partition Parameters: + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"a\":\"true\",\"b\":\"true\"}} + numFiles 0 + numRows 0 + rawDataSize 0 + totalSize 0 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: insert into p partition (c=100) values (1,2) +PREHOOK: type: QUERY +PREHOOK: Output: default@p@c=100 +POSTHOOK: query: insert into p partition (c=100) values (1,2) +POSTHOOK: type: QUERY +POSTHOOK: Output: default@p@c=100 +POSTHOOK: Lineage: p PARTITION(c=100).a EXPRESSION [(values__tmp__table__8)values__tmp__table__8.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: p PARTITION(c=100).b EXPRESSION [(values__tmp__table__8)values__tmp__table__8.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +PREHOOK: query: explain select max(a) from p where c=100 +PREHOOK: type: QUERY +POSTHOOK: query: explain select max(a) from p where c=100 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: 1 + Processor Tree: + ListSink + +PREHOOK: query: drop table p +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@p +PREHOOK: Output: default@p +POSTHOOK: query: drop table p +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@p +POSTHOOK: Output: default@p +PREHOOK: query: CREATE TABLE p(a int, b int) partitioned by (c int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@p +POSTHOOK: query: CREATE TABLE p(a int, b int) partitioned by (c int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@p +PREHOOK: query: insert into p partition (c) values (2,3,4) +PREHOOK: type: QUERY +PREHOOK: Output: default@p +POSTHOOK: query: insert into p partition (c) values (2,3,4) +POSTHOOK: type: QUERY +POSTHOOK: Output: default@p@c=4 +POSTHOOK: Lineage: p PARTITION(c=4).a EXPRESSION [(values__tmp__table__9)values__tmp__table__9.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: p PARTITION(c=4).b EXPRESSION [(values__tmp__table__9)values__tmp__table__9.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +PREHOOK: query: explain select max(a) from p where c=4 +PREHOOK: type: QUERY +POSTHOOK: query: explain select max(a) from p where c=4 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: p + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: a (type: int) + outputColumnNames: a + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: max(a) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int) + Reduce Operator Tree: + Group By Operator + aggregations: max(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: analyze table p partition(c=4) compute statistics for columns a +PREHOOK: type: QUERY +PREHOOK: Input: default@p +PREHOOK: Input: default@p@c=4 +#### A masked pattern was here #### +POSTHOOK: query: analyze table p partition(c=4) compute statistics for columns a +POSTHOOK: type: QUERY +POSTHOOK: Input: default@p +POSTHOOK: Input: default@p@c=4 +#### A masked pattern was here #### +PREHOOK: query: desc formatted p partition(c=4) +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@p +POSTHOOK: query: desc formatted p partition(c=4) +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@p +# col_name data_type comment + +a int +b int + +# Partition Information +# col_name data_type comment + +c int + +# Detailed Partition Information +Partition Value: [4] +Database: default +Table: p +#### A masked pattern was here #### +Partition Parameters: + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"a\":\"true\"}} + numFiles 1 + numRows 1 + rawDataSize 3 + totalSize 4 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: explain select max(a) from p where c=4 +PREHOOK: type: QUERY +POSTHOOK: query: explain select max(a) from p where c=4 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: 1 + Processor Tree: + ListSink + +PREHOOK: query: explain select max(b) from p where c=4 +PREHOOK: type: QUERY +POSTHOOK: query: explain select max(b) from p where c=4 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: p + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: b (type: int) + outputColumnNames: b + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: max(b) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int) + Reduce Operator Tree: + Group By Operator + aggregations: max(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: insert into p partition (c) values (20,30,4) +PREHOOK: type: QUERY +PREHOOK: Output: default@p +POSTHOOK: query: insert into p partition (c) values (20,30,4) +POSTHOOK: type: QUERY +POSTHOOK: Output: default@p@c=4 +POSTHOOK: Lineage: p PARTITION(c=4).a EXPRESSION [(values__tmp__table__10)values__tmp__table__10.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: p PARTITION(c=4).b EXPRESSION [(values__tmp__table__10)values__tmp__table__10.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +PREHOOK: query: desc formatted p partition(c=4) +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@p +POSTHOOK: query: desc formatted p partition(c=4) +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@p +# col_name data_type comment + +a int +b int + +# Partition Information +# col_name data_type comment + +c int + +# Detailed Partition Information +Partition Value: [4] +Database: default +Table: p +#### A masked pattern was here #### +Partition Parameters: + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"a\":\"true\"}} + numFiles 2 + numRows 2 + rawDataSize 8 + totalSize 10 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: explain select min(a) from p where c=4 +PREHOOK: type: QUERY +POSTHOOK: query: explain select min(a) from p where c=4 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: 1 + Processor Tree: + ListSink + +PREHOOK: query: explain select min(b) from p where c=4 +PREHOOK: type: QUERY +POSTHOOK: query: explain select min(b) from p where c=4 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: p + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: b (type: int) + outputColumnNames: b + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: min(b) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int) + Reduce Operator Tree: + Group By Operator + aggregations: min(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select min(a) from p where c=4 +PREHOOK: type: QUERY +PREHOOK: Input: default@p +#### A masked pattern was here #### +POSTHOOK: query: select min(a) from p where c=4 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@p +#### A masked pattern was here #### +2 +PREHOOK: query: select min(b) from p where c=4 +PREHOOK: type: QUERY +PREHOOK: Input: default@p +PREHOOK: Input: default@p@c=4 +#### A masked pattern was here #### +POSTHOOK: query: select min(b) from p where c=4 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@p +POSTHOOK: Input: default@p@c=4 +#### A masked pattern was here #### +3 diff --git a/ql/src/test/results/clientpositive/autoColumnStats_3.q.out b/ql/src/test/results/clientpositive/autoColumnStats_3.q.out index b916eba..9561436 100644 --- a/ql/src/test/results/clientpositive/autoColumnStats_3.q.out +++ b/ql/src/test/results/clientpositive/autoColumnStats_3.q.out @@ -36,7 +36,7 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\"}} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 0 numRows 0 rawDataSize 0 @@ -367,7 +367,7 @@ Database: default Table: nzhang_part14 #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"value\":\"true\"}} numFiles 2 numRows 4 rawDataSize 12 @@ -407,7 +407,7 @@ Database: default Table: nzhang_part14 #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} numFiles 2 numRows 4 rawDataSize 16 diff --git a/ql/src/test/results/clientpositive/autoColumnStats_4.q.out b/ql/src/test/results/clientpositive/autoColumnStats_4.q.out index 3ae2f20..dd8ea1a 100644 --- a/ql/src/test/results/clientpositive/autoColumnStats_4.q.out +++ b/ql/src/test/results/clientpositive/autoColumnStats_4.q.out @@ -24,7 +24,7 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"a\":\"true\",\"b\":\"true\"}} numFiles 0 numRows 0 rawDataSize 0 @@ -197,7 +197,6 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: - COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"a\":\"true\",\"b\":\"true\"}} numFiles 2 numRows 0 rawDataSize 0 diff --git a/ql/src/test/results/clientpositive/autoColumnStats_5.q.out b/ql/src/test/results/clientpositive/autoColumnStats_5.q.out index e19fb5f..f5daaba 100644 --- a/ql/src/test/results/clientpositive/autoColumnStats_5.q.out +++ b/ql/src/test/results/clientpositive/autoColumnStats_5.q.out @@ -603,7 +603,7 @@ Database: default Table: partitioned1 #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"a\":\"true\",\"b\":\"true\",\"c\":\"true\",\"d\":\"true\"}} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"a\":\"true\",\"b\":\"true\"}} numFiles 2 numRows 6 rawDataSize 78 @@ -637,6 +637,6 @@ POSTHOOK: query: desc formatted partitioned1 partition(part=1) c POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@partitioned1 col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment +# col_name data_type comment -c int 100 200 0 3 from deserializer +c int from deserializer