diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/StatObjectConverter.java b/metastore/src/java/org/apache/hadoop/hive/metastore/StatObjectConverter.java index a5f40b1..2627ff0 100644 --- a/metastore/src/java/org/apache/hadoop/hive/metastore/StatObjectConverter.java +++ b/metastore/src/java/org/apache/hadoop/hive/metastore/StatObjectConverter.java @@ -64,38 +64,54 @@ public static MTableColumnStatistics convertToMTableColumnStatistics(MTable tabl if (statsObj.getStatsData().isSetBooleanStats()) { BooleanColumnStatsData boolStats = statsObj.getStatsData().getBooleanStats(); - mColStats.setBooleanStats(boolStats.getNumTrues(), boolStats.getNumFalses(), - boolStats.getNumNulls()); + mColStats.setBooleanStats( + boolStats.isSetNumTrues() ? boolStats.getNumTrues() : null, + boolStats.isSetNumFalses() ? boolStats.getNumFalses() : null, + boolStats.isSetNumNulls() ? boolStats.getNumNulls() : null); } else if (statsObj.getStatsData().isSetLongStats()) { LongColumnStatsData longStats = statsObj.getStatsData().getLongStats(); - mColStats.setLongStats(longStats.getNumNulls(), longStats.getNumDVs(), + mColStats.setLongStats( + longStats.isSetNumNulls() ? longStats.getNumNulls() : null, + longStats.isSetNumDVs() ? longStats.getNumDVs() : null, longStats.isSetLowValue() ? longStats.getLowValue() : null, longStats.isSetHighValue() ? longStats.getHighValue() : null); } else if (statsObj.getStatsData().isSetDoubleStats()) { DoubleColumnStatsData doubleStats = statsObj.getStatsData().getDoubleStats(); - mColStats.setDoubleStats(doubleStats.getNumNulls(), doubleStats.getNumDVs(), + mColStats.setDoubleStats( + doubleStats.isSetNumNulls() ? doubleStats.getNumNulls() : null, + doubleStats.isSetNumDVs() ? doubleStats.getNumDVs() : null, doubleStats.isSetLowValue() ? doubleStats.getLowValue() : null, doubleStats.isSetHighValue() ? doubleStats.getHighValue() : null); } else if (statsObj.getStatsData().isSetDecimalStats()) { DecimalColumnStatsData decimalStats = statsObj.getStatsData().getDecimalStats(); String low = decimalStats.isSetLowValue() ? createJdoDecimalString(decimalStats.getLowValue()) : null; String high = decimalStats.isSetHighValue() ? createJdoDecimalString(decimalStats.getHighValue()) : null; - mColStats.setDecimalStats(decimalStats.getNumNulls(), decimalStats.getNumDVs(), low, high); + mColStats.setDecimalStats( + decimalStats.isSetNumNulls() ? decimalStats.getNumNulls() : null, + decimalStats.isSetNumDVs() ? decimalStats.getNumDVs() : null, + low, high); } else if (statsObj.getStatsData().isSetStringStats()) { StringColumnStatsData stringStats = statsObj.getStatsData().getStringStats(); - mColStats.setStringStats(stringStats.getNumNulls(), stringStats.getNumDVs(), - stringStats.getMaxColLen(), stringStats.getAvgColLen()); + mColStats.setStringStats( + stringStats.isSetNumNulls() ? stringStats.getNumNulls() : null, + stringStats.isSetNumDVs() ? stringStats.getNumDVs() : null, + stringStats.isSetMaxColLen() ? stringStats.getMaxColLen() : null, + stringStats.isSetAvgColLen() ? stringStats.getAvgColLen() : null); } else if (statsObj.getStatsData().isSetBinaryStats()) { BinaryColumnStatsData binaryStats = statsObj.getStatsData().getBinaryStats(); - mColStats.setBinaryStats(binaryStats.getNumNulls(), binaryStats.getMaxColLen(), - binaryStats.getAvgColLen()); + mColStats.setBinaryStats( + binaryStats.isSetNumNulls() ? binaryStats.getNumNulls() : null, + binaryStats.isSetMaxColLen() ? binaryStats.getMaxColLen() : null, + binaryStats.isSetAvgColLen() ? binaryStats.getAvgColLen() : null); } return mColStats; } public static void setFieldsIntoOldStats( MTableColumnStatistics mStatsObj, MTableColumnStatistics oldStatsObj) { - oldStatsObj.setAvgColLen(mStatsObj.getAvgColLen()); + if (mStatsObj.getAvgColLen() != null) { + oldStatsObj.setAvgColLen(mStatsObj.getAvgColLen()); + } if (mStatsObj.getLongHighValue() != null) { oldStatsObj.setLongHighValue(mStatsObj.getLongHighValue()); } @@ -114,29 +130,63 @@ public static void setFieldsIntoOldStats( if (mStatsObj.getDecimalHighValue() != null) { oldStatsObj.setDecimalHighValue(mStatsObj.getDecimalHighValue()); } - oldStatsObj.setMaxColLen(mStatsObj.getMaxColLen()); - oldStatsObj.setNumDVs(mStatsObj.getNumDVs()); - oldStatsObj.setNumFalses(mStatsObj.getNumFalses()); - oldStatsObj.setNumTrues(mStatsObj.getNumTrues()); - oldStatsObj.setNumNulls(mStatsObj.getNumNulls()); + if (mStatsObj.getMaxColLen() != null) { + oldStatsObj.setMaxColLen(mStatsObj.getMaxColLen()); + } + if (mStatsObj.getNumDVs() != null) { + oldStatsObj.setNumDVs(mStatsObj.getNumDVs()); + } + if (mStatsObj.getNumFalses() != null) { + oldStatsObj.setNumFalses(mStatsObj.getNumFalses()); + } + if (mStatsObj.getNumTrues() != null) { + oldStatsObj.setNumTrues(mStatsObj.getNumTrues()); + } + if (mStatsObj.getNumNulls() != null) { + oldStatsObj.setNumNulls(mStatsObj.getNumNulls()); + } oldStatsObj.setLastAnalyzed(mStatsObj.getLastAnalyzed()); } public static void setFieldsIntoOldStats( MPartitionColumnStatistics mStatsObj, MPartitionColumnStatistics oldStatsObj) { - oldStatsObj.setAvgColLen(mStatsObj.getAvgColLen()); - oldStatsObj.setLongHighValue(mStatsObj.getLongHighValue()); - oldStatsObj.setDoubleHighValue(mStatsObj.getDoubleHighValue()); + if (mStatsObj.getAvgColLen() != null) { + oldStatsObj.setAvgColLen(mStatsObj.getAvgColLen()); + } + if (mStatsObj.getLongHighValue() != null) { + oldStatsObj.setLongHighValue(mStatsObj.getLongHighValue()); + } + if (mStatsObj.getDoubleHighValue() != null) { + oldStatsObj.setDoubleHighValue(mStatsObj.getDoubleHighValue()); + } oldStatsObj.setLastAnalyzed(mStatsObj.getLastAnalyzed()); - oldStatsObj.setLongLowValue(mStatsObj.getLongLowValue()); - oldStatsObj.setDoubleLowValue(mStatsObj.getDoubleLowValue()); - oldStatsObj.setDecimalLowValue(mStatsObj.getDecimalLowValue()); - oldStatsObj.setDecimalHighValue(mStatsObj.getDecimalHighValue()); - oldStatsObj.setMaxColLen(mStatsObj.getMaxColLen()); - oldStatsObj.setNumDVs(mStatsObj.getNumDVs()); - oldStatsObj.setNumFalses(mStatsObj.getNumFalses()); - oldStatsObj.setNumTrues(mStatsObj.getNumTrues()); - oldStatsObj.setNumNulls(mStatsObj.getNumNulls()); + if (mStatsObj.getLongLowValue() != null) { + oldStatsObj.setLongLowValue(mStatsObj.getLongLowValue()); + } + if (mStatsObj.getDoubleLowValue() != null) { + oldStatsObj.setDoubleLowValue(mStatsObj.getDoubleLowValue()); + } + if (mStatsObj.getDecimalLowValue() != null) { + oldStatsObj.setDecimalLowValue(mStatsObj.getDecimalLowValue()); + } + if (mStatsObj.getDecimalHighValue() != null) { + oldStatsObj.setDecimalHighValue(mStatsObj.getDecimalHighValue()); + } + if (mStatsObj.getMaxColLen() != null) { + oldStatsObj.setMaxColLen(mStatsObj.getMaxColLen()); + } + if (mStatsObj.getNumDVs() != null) { + oldStatsObj.setNumDVs(mStatsObj.getNumDVs()); + } + if (mStatsObj.getNumFalses() != null) { + oldStatsObj.setNumFalses(mStatsObj.getNumFalses()); + } + if (mStatsObj.getNumTrues() != null) { + oldStatsObj.setNumTrues(mStatsObj.getNumTrues()); + } + if (mStatsObj.getNumNulls() != null) { + oldStatsObj.setNumNulls(mStatsObj.getNumNulls()); + } } public static ColumnStatisticsObj getTableColumnStatisticsObj( @@ -241,31 +291,45 @@ public static MPartitionColumnStatistics convertToMPartitionColumnStatistics( if (statsObj.getStatsData().isSetBooleanStats()) { BooleanColumnStatsData boolStats = statsObj.getStatsData().getBooleanStats(); - mColStats.setBooleanStats(boolStats.getNumTrues(), boolStats.getNumFalses(), - boolStats.getNumNulls()); + mColStats.setBooleanStats( + boolStats.isSetNumTrues() ? boolStats.getNumTrues() : null, + boolStats.isSetNumFalses() ? boolStats.getNumFalses() : null, + boolStats.isSetNumNulls() ? boolStats.getNumNulls() : null); } else if (statsObj.getStatsData().isSetLongStats()) { LongColumnStatsData longStats = statsObj.getStatsData().getLongStats(); - mColStats.setLongStats(longStats.getNumNulls(), longStats.getNumDVs(), + mColStats.setLongStats( + longStats.isSetNumNulls() ? longStats.getNumNulls() : null, + longStats.isSetNumDVs() ? longStats.getNumDVs() : null, longStats.isSetLowValue() ? longStats.getLowValue() : null, longStats.isSetHighValue() ? longStats.getHighValue() : null); } else if (statsObj.getStatsData().isSetDoubleStats()) { DoubleColumnStatsData doubleStats = statsObj.getStatsData().getDoubleStats(); - mColStats.setDoubleStats(doubleStats.getNumNulls(), doubleStats.getNumDVs(), + mColStats.setDoubleStats( + doubleStats.isSetNumNulls() ? doubleStats.getNumNulls() : null, + doubleStats.isSetNumDVs() ? doubleStats.getNumDVs() : null, doubleStats.isSetLowValue() ? doubleStats.getLowValue() : null, doubleStats.isSetHighValue() ? doubleStats.getHighValue() : null); } else if (statsObj.getStatsData().isSetDecimalStats()) { DecimalColumnStatsData decimalStats = statsObj.getStatsData().getDecimalStats(); String low = decimalStats.isSetLowValue() ? createJdoDecimalString(decimalStats.getLowValue()) : null; String high = decimalStats.isSetHighValue() ? createJdoDecimalString(decimalStats.getHighValue()) : null; - mColStats.setDecimalStats(decimalStats.getNumNulls(), decimalStats.getNumDVs(), low, high); + mColStats.setDecimalStats( + decimalStats.isSetNumNulls() ? decimalStats.getNumNulls() : null, + decimalStats.isSetNumDVs() ? decimalStats.getNumDVs() : null, + low, high); } else if (statsObj.getStatsData().isSetStringStats()) { StringColumnStatsData stringStats = statsObj.getStatsData().getStringStats(); - mColStats.setStringStats(stringStats.getNumNulls(), stringStats.getNumDVs(), - stringStats.getMaxColLen(), stringStats.getAvgColLen()); + mColStats.setStringStats( + stringStats.isSetNumNulls() ? stringStats.getNumNulls() : null, + stringStats.isSetNumDVs() ? stringStats.getNumDVs() : null, + stringStats.isSetMaxColLen() ? stringStats.getMaxColLen() : null, + stringStats.isSetAvgColLen() ? stringStats.getAvgColLen() : null); } else if (statsObj.getStatsData().isSetBinaryStats()) { BinaryColumnStatsData binaryStats = statsObj.getStatsData().getBinaryStats(); - mColStats.setBinaryStats(binaryStats.getNumNulls(), binaryStats.getMaxColLen(), - binaryStats.getAvgColLen()); + mColStats.setBinaryStats( + binaryStats.isSetNumNulls() ? binaryStats.getNumNulls() : null, + binaryStats.isSetMaxColLen() ? binaryStats.getMaxColLen() : null, + binaryStats.isSetAvgColLen() ? binaryStats.getAvgColLen() : null); } return mColStats; } diff --git a/metastore/src/model/org/apache/hadoop/hive/metastore/model/MPartitionColumnStatistics.java b/metastore/src/model/org/apache/hadoop/hive/metastore/model/MPartitionColumnStatistics.java index 89c31dc..1666dc3 100644 --- a/metastore/src/model/org/apache/hadoop/hive/metastore/model/MPartitionColumnStatistics.java +++ b/metastore/src/model/org/apache/hadoop/hive/metastore/model/MPartitionColumnStatistics.java @@ -46,12 +46,12 @@ private Double doubleHighValue; private String decimalLowValue; private String decimalHighValue; - private long numNulls; - private long numDVs; - private double avgColLen; - private long maxColLen; - private long numTrues; - private long numFalses; + private Long numNulls; + private Long numDVs; + private Double avgColLen; + private Long maxColLen; + private Long numTrues; + private Long numFalses; private long lastAnalyzed; public MPartitionColumnStatistics() {} @@ -72,7 +72,7 @@ public void setColName(String colName) { this.colName = colName; } - public long getNumNulls() { + public Long getNumNulls() { return numNulls; } @@ -80,7 +80,7 @@ public void setNumNulls(long numNulls) { this.numNulls = numNulls; } - public long getNumDVs() { + public Long getNumDVs() { return numDVs; } @@ -88,7 +88,7 @@ public void setNumDVs(long numDVs) { this.numDVs = numDVs; } - public double getAvgColLen() { + public Double getAvgColLen() { return avgColLen; } @@ -96,7 +96,7 @@ public void setAvgColLen(double avgColLen) { this.avgColLen = avgColLen; } - public long getMaxColLen() { + public Long getMaxColLen() { return maxColLen; } @@ -104,7 +104,7 @@ public void setMaxColLen(long maxColLen) { this.maxColLen = maxColLen; } - public long getNumTrues() { + public Long getNumTrues() { return numTrues; } @@ -112,7 +112,7 @@ public void setNumTrues(long numTrues) { this.numTrues = numTrues; } - public long getNumFalses() { + public Long getNumFalses() { return numFalses; } @@ -160,20 +160,20 @@ public void setColType(String colType) { this.colType = colType; } - public void setBooleanStats(long numTrues, long numFalses, long numNulls) { + public void setBooleanStats(Long numTrues, Long numFalses, Long numNulls) { this.numTrues = numTrues; this.numFalses = numFalses; this.numNulls = numNulls; } - public void setLongStats(long numNulls, long numNDVs, Long lowValue, Long highValue) { + public void setLongStats(Long numNulls, Long numNDVs, Long lowValue, Long highValue) { this.numNulls = numNulls; this.numDVs = numNDVs; this.longLowValue = lowValue; this.longHighValue = highValue; } - public void setDoubleStats(long numNulls, long numNDVs, Double lowValue, Double highValue) { + public void setDoubleStats(Long numNulls, Long numNDVs, Double lowValue, Double highValue) { this.numNulls = numNulls; this.numDVs = numNDVs; this.doubleLowValue = lowValue; @@ -181,21 +181,21 @@ public void setDoubleStats(long numNulls, long numNDVs, Double lowValue, Double } public void setDecimalStats( - long numNulls, long numNDVs, String lowValue, String highValue) { + Long numNulls, Long numNDVs, String lowValue, String highValue) { this.numNulls = numNulls; this.numDVs = numNDVs; this.decimalLowValue = lowValue; this.decimalHighValue = highValue; } - public void setStringStats(long numNulls, long numNDVs, long maxColLen, double avgColLen) { + public void setStringStats(Long numNulls, Long numNDVs, Long maxColLen, Double avgColLen) { this.numNulls = numNulls; this.numDVs = numNDVs; this.maxColLen = maxColLen; this.avgColLen = avgColLen; } - public void setBinaryStats(long numNulls, long maxColLen, double avgColLen) { + public void setBinaryStats(Long numNulls, Long maxColLen, Double avgColLen) { this.numNulls = numNulls; this.maxColLen = maxColLen; this.avgColLen = avgColLen; diff --git a/metastore/src/model/org/apache/hadoop/hive/metastore/model/MTableColumnStatistics.java b/metastore/src/model/org/apache/hadoop/hive/metastore/model/MTableColumnStatistics.java index 44bbab5..bce9f0f 100644 --- a/metastore/src/model/org/apache/hadoop/hive/metastore/model/MTableColumnStatistics.java +++ b/metastore/src/model/org/apache/hadoop/hive/metastore/model/MTableColumnStatistics.java @@ -44,12 +44,12 @@ private Double doubleHighValue; private String decimalLowValue; private String decimalHighValue; - private long numNulls; - private long numDVs; - private double avgColLen; - private long maxColLen; - private long numTrues; - private long numFalses; + private Long numNulls; + private Long numDVs; + private Double avgColLen; + private Long maxColLen; + private Long numTrues; + private Long numFalses; private long lastAnalyzed; public MTableColumnStatistics() {} @@ -86,7 +86,7 @@ public void setColType(String colType) { this.colType = colType; } - public long getNumNulls() { + public Long getNumNulls() { return numNulls; } @@ -94,7 +94,7 @@ public void setNumNulls(long numNulls) { this.numNulls = numNulls; } - public long getNumDVs() { + public Long getNumDVs() { return numDVs; } @@ -102,7 +102,7 @@ public void setNumDVs(long numDVs) { this.numDVs = numDVs; } - public double getAvgColLen() { + public Double getAvgColLen() { return avgColLen; } @@ -110,7 +110,7 @@ public void setAvgColLen(double avgColLen) { this.avgColLen = avgColLen; } - public long getMaxColLen() { + public Long getMaxColLen() { return maxColLen; } @@ -118,7 +118,7 @@ public void setMaxColLen(long maxColLen) { this.maxColLen = maxColLen; } - public long getNumTrues() { + public Long getNumTrues() { return numTrues; } @@ -126,7 +126,7 @@ public void setNumTrues(long numTrues) { this.numTrues = numTrues; } - public long getNumFalses() { + public Long getNumFalses() { return numFalses; } @@ -150,20 +150,20 @@ public void setDbName(String dbName) { this.dbName = dbName; } - public void setBooleanStats(long numTrues, long numFalses, long numNulls) { + public void setBooleanStats(Long numTrues, Long numFalses, Long numNulls) { this.numTrues = numTrues; this.numFalses = numFalses; this.numNulls = numNulls; } - public void setLongStats(long numNulls, long numNDVs, Long lowValue, Long highValue) { + public void setLongStats(Long numNulls, Long numNDVs, Long lowValue, Long highValue) { this.numNulls = numNulls; this.numDVs = numNDVs; this.longLowValue = lowValue; this.longHighValue = highValue; } - public void setDoubleStats(long numNulls, long numNDVs, Double lowValue, Double highValue) { + public void setDoubleStats(Long numNulls, Long numNDVs, Double lowValue, Double highValue) { this.numNulls = numNulls; this.numDVs = numNDVs; this.doubleLowValue = lowValue; @@ -171,21 +171,21 @@ public void setDoubleStats(long numNulls, long numNDVs, Double lowValue, Double } public void setDecimalStats( - long numNulls, long numNDVs, String lowValue, String highValue) { + Long numNulls, Long numNDVs, String lowValue, String highValue) { this.numNulls = numNulls; this.numDVs = numNDVs; this.decimalLowValue = lowValue; this.decimalHighValue = highValue; } - public void setStringStats(long numNulls, long numNDVs, long maxColLen, double avgColLen) { + public void setStringStats(Long numNulls, Long numNDVs, Long maxColLen, Double avgColLen) { this.numNulls = numNulls; this.numDVs = numNDVs; this.maxColLen = maxColLen; this.avgColLen = avgColLen; } - public void setBinaryStats(long numNulls, long maxColLen, double avgColLen) { + public void setBinaryStats(Long numNulls, Long maxColLen, Double avgColLen) { this.numNulls = numNulls; this.maxColLen = maxColLen; this.avgColLen = avgColLen; diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/ColumnStatsUpdateTask.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/ColumnStatsUpdateTask.java new file mode 100644 index 0000000..28c17b7 --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/ColumnStatsUpdateTask.java @@ -0,0 +1,305 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec; + +import java.io.IOException; +import java.math.BigDecimal; +import java.nio.ByteBuffer; +import java.util.ArrayList; +import java.util.List; +import java.util.Map; +import java.util.Map.Entry; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.metastore.Warehouse; +import org.apache.hadoop.hive.metastore.api.BinaryColumnStatsData; +import org.apache.hadoop.hive.metastore.api.BooleanColumnStatsData; +import org.apache.hadoop.hive.metastore.api.ColumnStatistics; +import org.apache.hadoop.hive.metastore.api.ColumnStatisticsData; +import org.apache.hadoop.hive.metastore.api.ColumnStatisticsDesc; +import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj; +import org.apache.hadoop.hive.metastore.api.Decimal; +import org.apache.hadoop.hive.metastore.api.DecimalColumnStatsData; +import org.apache.hadoop.hive.metastore.api.DoubleColumnStatsData; +import org.apache.hadoop.hive.metastore.api.LongColumnStatsData; +import org.apache.hadoop.hive.metastore.api.MetaException; +import org.apache.hadoop.hive.metastore.api.StringColumnStatsData; +import org.apache.hadoop.hive.ql.DriverContext; +import org.apache.hadoop.hive.ql.QueryPlan; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.parse.SemanticException; +import org.apache.hadoop.hive.ql.plan.ColumnStatsDesc; +import org.apache.hadoop.hive.ql.plan.ColumnStatsUpdateWork; +import org.apache.hadoop.hive.ql.plan.api.StageType; +import org.apache.hadoop.hive.ql.session.SessionState; + +/** + * ColumnStatsUpdateTask implementation. For example, ALTER TABLE src_stat + * UPDATE STATISTICS for column key SET ('numDVs'='1111','avgColLen'='1.111'); + * For another example, ALTER TABLE src_stat_part PARTITION(partitionId=100) + * UPDATE STATISTICS for column value SET + * ('maxColLen'='4444','avgColLen'='44.4'); + **/ + +public class ColumnStatsUpdateTask extends Task { + private static final long serialVersionUID = 1L; + private static transient final Log LOG = LogFactory + .getLog(ColumnStatsUpdateTask.class); + + @Override + public void initialize(HiveConf conf, QueryPlan queryPlan, DriverContext ctx) { + super.initialize(conf, queryPlan, ctx); + } + + private ColumnStatistics constructColumnStatsFromInput() + throws SemanticException, MetaException { + + String dbName = SessionState.get().getCurrentDatabase(); + ColumnStatsDesc desc = work.getColStats(); + String tableName = desc.getTableName(); + String partName = work.getPartName(); + List colName = desc.getColName(); + List colType = desc.getColType(); + + ColumnStatisticsObj statsObj = new ColumnStatisticsObj(); + + // grammar prohibits more than 1 column so we are guaranteed to have only 1 + // element in this lists. + + statsObj.setColName(colName.get(0)); + + statsObj.setColType(colType.get(0)); + + ColumnStatisticsData statsData = new ColumnStatisticsData(); + + String columnType = colType.get(0); + + if (columnType.equalsIgnoreCase("long")) { + LongColumnStatsData longStats = new LongColumnStatsData(); + longStats.setNumNullsIsSet(false); + longStats.setNumDVsIsSet(false); + longStats.setLowValueIsSet(false); + longStats.setHighValueIsSet(false); + Map mapProp = work.getMapProp(); + for (Entry entry : mapProp.entrySet()) { + String fName = entry.getKey(); + String value = entry.getValue(); + if (fName.equals("numNulls")) { + longStats.setNumNulls(Long.parseLong(value)); + } else if (fName.equals("numDVs")) { + longStats.setNumDVs(Long.parseLong(value)); + } else if (fName.equals("lowValue")) { + longStats.setLowValue(Long.parseLong(value)); + } else if (fName.equals("highValue")) { + longStats.setHighValue(Long.parseLong(value)); + } else { + throw new SemanticException("Unknown stat"); + } + + } + statsData.setLongStats(longStats); + statsObj.setStatsData(statsData); + } else if (columnType.equalsIgnoreCase("double")) { + DoubleColumnStatsData doubleStats = new DoubleColumnStatsData(); + doubleStats.setNumNullsIsSet(false); + doubleStats.setNumDVsIsSet(false); + doubleStats.setLowValueIsSet(false); + doubleStats.setHighValueIsSet(false); + Map mapProp = work.getMapProp(); + for (Entry entry : mapProp.entrySet()) { + String fName = entry.getKey(); + String value = entry.getValue(); + if (fName.equals("numNulls")) { + doubleStats.setNumNulls(Long.parseLong(value)); + } else if (fName.equals("numDVs")) { + doubleStats.setNumDVs(Long.parseLong(value)); + } else if (fName.equals("lowValue")) { + doubleStats.setLowValue(Double.parseDouble(value)); + } else if (fName.equals("highValue")) { + doubleStats.setHighValue(Double.parseDouble(value)); + } else { + throw new SemanticException("Unknown stat"); + } + } + statsData.setDoubleStats(doubleStats); + statsObj.setStatsData(statsData); + } else if (columnType.equalsIgnoreCase("string")) { + StringColumnStatsData stringStats = new StringColumnStatsData(); + stringStats.setMaxColLenIsSet(false); + stringStats.setAvgColLenIsSet(false); + stringStats.setNumNullsIsSet(false); + stringStats.setNumDVsIsSet(false); + Map mapProp = work.getMapProp(); + for (Entry entry : mapProp.entrySet()) { + String fName = entry.getKey(); + String value = entry.getValue(); + if (fName.equals("numNulls")) { + stringStats.setNumNulls(Long.parseLong(value)); + } else if (fName.equals("numDVs")) { + stringStats.setNumDVs(Long.parseLong(value)); + } else if (fName.equals("avgColLen")) { + stringStats.setAvgColLen(Double.parseDouble(value)); + } else if (fName.equals("maxColLen")) { + stringStats.setMaxColLen(Long.parseLong(value)); + } else { + throw new SemanticException("Unknown stat"); + } + } + statsData.setStringStats(stringStats); + statsObj.setStatsData(statsData); + } else if (columnType.equalsIgnoreCase("boolean")) { + BooleanColumnStatsData booleanStats = new BooleanColumnStatsData(); + booleanStats.setNumNullsIsSet(false); + booleanStats.setNumTruesIsSet(false); + booleanStats.setNumFalsesIsSet(false); + Map mapProp = work.getMapProp(); + for (Entry entry : mapProp.entrySet()) { + String fName = entry.getKey(); + String value = entry.getValue(); + if (fName.equals("numNulls")) { + booleanStats.setNumNulls(Long.parseLong(value)); + } else if (fName.equals("numTrues")) { + booleanStats.setNumTrues(Long.parseLong(value)); + } else if (fName.equals("numFalses")) { + booleanStats.setNumFalses(Long.parseLong(value)); + } else { + throw new SemanticException("Unknown stat"); + } + } + statsData.setBooleanStats(booleanStats); + statsObj.setStatsData(statsData); + } else if (columnType.equalsIgnoreCase("binary")) { + BinaryColumnStatsData binaryStats = new BinaryColumnStatsData(); + binaryStats.setNumNullsIsSet(false); + binaryStats.setAvgColLenIsSet(false); + binaryStats.setMaxColLenIsSet(false); + Map mapProp = work.getMapProp(); + for (Entry entry : mapProp.entrySet()) { + String fName = entry.getKey(); + String value = entry.getValue(); + if (fName.equals("numNulls")) { + binaryStats.setNumNulls(Long.parseLong(value)); + } else if (fName.equals("avgColLen")) { + binaryStats.setAvgColLen(Double.parseDouble(value)); + } else if (fName.equals("maxColLen")) { + binaryStats.setMaxColLen(Long.parseLong(value)); + } else { + throw new SemanticException("Unknown stat"); + } + } + statsData.setBinaryStats(binaryStats); + statsObj.setStatsData(statsData); + } else if (columnType.equalsIgnoreCase("decimal")) { + DecimalColumnStatsData decimalStats = new DecimalColumnStatsData(); + decimalStats.setNumNullsIsSet(false); + decimalStats.setNumDVsIsSet(false); + decimalStats.setLowValueIsSet(false); + decimalStats.setHighValueIsSet(false); + Map mapProp = work.getMapProp(); + for (Entry entry : mapProp.entrySet()) { + String fName = entry.getKey(); + String value = entry.getValue(); + if (fName.equals("numNulls")) { + decimalStats.setNumNulls(Long.parseLong(value)); + } else if (fName.equals("numDVs")) { + decimalStats.setNumDVs(Long.parseLong(value)); + } else if (fName.equals("lowValue")) { + BigDecimal d = new BigDecimal(value); + decimalStats.setLowValue(new Decimal(ByteBuffer.wrap(d + .unscaledValue().toByteArray()), (short) d.scale())); + } else if (fName.equals("highValue")) { + BigDecimal d = new BigDecimal(value); + decimalStats.setHighValue(new Decimal(ByteBuffer.wrap(d + .unscaledValue().toByteArray()), (short) d.scale())); + } else { + throw new SemanticException("Unknown stat"); + } + } + statsData.setDecimalStats(decimalStats); + statsObj.setStatsData(statsData); + } else { + throw new SemanticException("Unsupported type"); + } + + ColumnStatisticsDesc statsDesc = getColumnStatsDesc(dbName, tableName, + partName, partName == null); + ColumnStatistics colStat = new ColumnStatistics(); + colStat.setStatsDesc(statsDesc); + colStat.addToStatsObj(statsObj); + return colStat; + } + + private ColumnStatisticsDesc getColumnStatsDesc(String dbName, + String tableName, String partName, boolean isTblLevel) { + ColumnStatisticsDesc statsDesc = new ColumnStatisticsDesc(); + statsDesc.setDbName(dbName); + statsDesc.setTableName(tableName); + statsDesc.setIsTblLevel(isTblLevel); + if (!isTblLevel) { + statsDesc.setPartName(partName); + } else { + statsDesc.setPartName(null); + } + return statsDesc; + } + + private int persistTableStats() throws HiveException, MetaException, + IOException { + // Construct a column statistics object from user input + ColumnStatistics colStats = constructColumnStatsFromInput(); + // Persist the column statistics object to the metastore + db.updateTableColumnStatistics(colStats); + return 0; + } + + private int persistPartitionStats() throws HiveException, MetaException, + IOException { + // Construct a column statistics object from user input + ColumnStatistics colStats = constructColumnStatsFromInput(); + // Persist the column statistics object to the metastore + db.updatePartitionColumnStatistics(colStats); + return 0; + } + + @Override + public int execute(DriverContext driverContext) { + try { + if (work.getColStats().isTblLevel()) { + return persistTableStats(); + } else { + return persistPartitionStats(); + } + } catch (Exception e) { + LOG.info(e); + } + return 1; + } + + @Override + public StageType getType() { + return StageType.COLUMNSTATS; + } + + @Override + public String getName() { + return "COLUMNSTATS UPDATE TASK"; + } +} diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/TaskFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/TaskFactory.java index 24dfed1..3d74459 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/TaskFactory.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/TaskFactory.java @@ -32,6 +32,7 @@ import org.apache.hadoop.hive.ql.io.merge.MergeWork; import org.apache.hadoop.hive.ql.io.rcfile.stats.PartialScanTask; import org.apache.hadoop.hive.ql.io.rcfile.stats.PartialScanWork; +import org.apache.hadoop.hive.ql.plan.ColumnStatsUpdateWork; import org.apache.hadoop.hive.ql.plan.ColumnStatsWork; import org.apache.hadoop.hive.ql.plan.ConditionalWork; import org.apache.hadoop.hive.ql.plan.CopyWork; @@ -92,6 +93,7 @@ public TaskTuple(Class workClass, Class> taskClass) { StatsTask.class)); taskvec.add(new TaskTuple(StatsNoJobWork.class, StatsNoJobTask.class)); taskvec.add(new TaskTuple(ColumnStatsWork.class, ColumnStatsTask.class)); + taskvec.add(new TaskTuple(ColumnStatsUpdateWork.class, ColumnStatsUpdateTask.class)); taskvec.add(new TaskTuple(MergeWork.class, MergeTask.class)); taskvec.add(new TaskTuple(DependencyCollectionWork.class, diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java index 0e705aa..1189610 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java @@ -25,6 +25,7 @@ import java.net.URI; import java.net.URISyntaxException; import java.util.ArrayList; +import java.util.Arrays; import java.util.Collection; import java.util.HashMap; import java.util.HashSet; @@ -57,6 +58,7 @@ import org.apache.hadoop.hive.ql.Driver; import org.apache.hadoop.hive.ql.ErrorMsg; import org.apache.hadoop.hive.ql.exec.ArchiveUtils; +import org.apache.hadoop.hive.ql.exec.ColumnStatsUpdateTask; import org.apache.hadoop.hive.ql.exec.FetchTask; import org.apache.hadoop.hive.ql.exec.FunctionRegistry; import org.apache.hadoop.hive.ql.exec.Task; @@ -91,6 +93,8 @@ import org.apache.hadoop.hive.ql.plan.AlterTableDesc.AlterTableTypes; import org.apache.hadoop.hive.ql.plan.AlterTableExchangePartition; import org.apache.hadoop.hive.ql.plan.AlterTableSimpleDesc; +import org.apache.hadoop.hive.ql.plan.ColumnStatsDesc; +import org.apache.hadoop.hive.ql.plan.ColumnStatsUpdateWork; import org.apache.hadoop.hive.ql.plan.CreateDatabaseDesc; import org.apache.hadoop.hive.ql.plan.CreateIndexDesc; import org.apache.hadoop.hive.ql.plan.DDLWork; @@ -276,6 +280,8 @@ public void analyzeInternal(ASTNode ast) throws SemanticException { analyzeAlterTableClusterSort(ast, tableName, partSpec); } else if (ast.getToken().getType() == HiveParser.TOK_COMPACT) { analyzeAlterTableCompact(ast, tableName, partSpec); + } else if(ast.getToken().getType() == HiveParser.TOK_ALTERTABLE_UPDATECOLSTATS){ + analyzeAlterTableUpdateStats(ast,tblPart); } break; } @@ -378,6 +384,9 @@ public void analyzeInternal(ASTNode ast) throws SemanticException { case HiveParser.TOK_ALTERTABLE_RENAME: analyzeAlterTableRename(ast, false); break; + case HiveParser.TOK_ALTERTABLE_UPDATECOLSTATS: + analyzeAlterTableUpdateStats(ast, null); + break; case HiveParser.TOK_ALTERTABLE_TOUCH: analyzeAlterTableTouch(ast); break; @@ -507,6 +516,57 @@ public void analyzeInternal(ASTNode ast) throws SemanticException { } } + private void analyzeAlterTableUpdateStats(ASTNode ast, TablePartition tblPart) + throws SemanticException { + String tblName = null; + String colName = null; + Map mapProp = null; + Map partSpec = null; + String partName = null; + if (tblPart == null) { + tblName = getUnescapedName((ASTNode) ast.getChild(0)); + colName = getUnescapedName((ASTNode) ast.getChild(1)); + mapProp = getProps((ASTNode) (ast.getChild(2)).getChild(0)); + } else { + tblName = tblPart.tableName; + partSpec = tblPart.partSpec; + try { + partName = Warehouse.makePartName(partSpec, false); + } catch (MetaException e) { + // TODO Auto-generated catch block + throw new SemanticException("partition " + partSpec.toString() + + " not found"); + } + colName = getUnescapedName((ASTNode) ast.getChild(0)); + mapProp = getProps((ASTNode) (ast.getChild(1)).getChild(0)); + } + + Table tbl = null; + try { + tbl = db.getTable(tblName); + } catch (HiveException e) { + throw new SemanticException("table " + tbl + " not found"); + } + + String colType = null; + List cols = tbl.getCols(); + for (FieldSchema col : cols) { + if (colName.equalsIgnoreCase(col.getName())) { + colType = col.getType(); + break; + } + } + + if (colType == null) + throw new SemanticException("column type not found"); + + ColumnStatsDesc cStatsDesc = new ColumnStatsDesc(tbl.getTableName(), + Arrays.asList(colName), Arrays.asList(colType), partSpec == null); + ColumnStatsUpdateTask cStatsUpdateTask = (ColumnStatsUpdateTask) TaskFactory + .get(new ColumnStatsUpdateWork(cStatsDesc, partName, mapProp), conf); + rootTasks.add(cStatsUpdateTask); + } + private void analyzeSetShowRole(ASTNode ast) throws SemanticException { switch (ast.getChildCount()) { case 0: diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g b/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g index bf77d1c..a76cad7 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g @@ -146,6 +146,7 @@ TOK_ALTERTABLE_ARCHIVE; TOK_ALTERTABLE_UNARCHIVE; TOK_ALTERTABLE_SERDEPROPERTIES; TOK_ALTERTABLE_SERIALIZER; +TOK_ALTERTABLE_UPDATECOLSTATS; TOK_TABLE_PARTITION; TOK_ALTERTABLE_FILEFORMAT; TOK_ALTERTABLE_LOCATION; @@ -938,6 +939,7 @@ alterTableStatementSuffix : alterStatementSuffixRename | alterStatementSuffixAddCol | alterStatementSuffixRenameCol + | alterStatementSuffixUpdateStatsCol | alterStatementSuffixDropPartitions | alterStatementSuffixAddPartitions | alterStatementSuffixTouch @@ -1028,6 +1030,13 @@ alterStatementSuffixRenameCol ->^(TOK_ALTERTABLE_RENAMECOL tableName $oldName $newName colType $comment? alterStatementChangeColPosition?) ; +alterStatementSuffixUpdateStatsCol +@init { pushMsg("update column statistics", state); } +@after { popMsg(state); } + : identifier KW_UPDATE KW_STATISTICS KW_FOR KW_COLUMN? colName=identifier KW_SET tableProperties (KW_COMMENT comment=StringLiteral)? + ->^(TOK_ALTERTABLE_UPDATECOLSTATS identifier $colName tableProperties $comment?) + ; + alterStatementChangeColPosition : first=KW_FIRST|KW_AFTER afterCol=identifier ->{$first != null}? ^(TOK_ALTERTABLE_CHANGECOL_AFTER_POSITION ) @@ -1130,6 +1139,7 @@ alterTblPartitionStatementSuffix | alterStatementSuffixMergeFiles | alterStatementSuffixSerdeProperties | alterStatementSuffixRenamePart + | alterStatementSuffixStatsPart | alterStatementSuffixBucketNum | alterTblPartitionStatementSuffixSkewedLocation | alterStatementSuffixClusterbySortby @@ -1221,6 +1231,13 @@ alterStatementSuffixRenamePart ->^(TOK_ALTERTABLE_RENAMEPART partitionSpec) ; +alterStatementSuffixStatsPart +@init { pushMsg("alter table stats partition statement", state); } +@after { popMsg(state); } + : KW_UPDATE KW_STATISTICS KW_FOR KW_COLUMN? colName=identifier KW_SET tableProperties (KW_COMMENT comment=StringLiteral)? + ->^(TOK_ALTERTABLE_UPDATECOLSTATS $colName tableProperties $comment?) + ; + alterStatementSuffixMergeFiles @init { pushMsg("", state); } @after { popMsg(state); } @@ -1300,6 +1317,7 @@ descStatement | (KW_DESCRIBE|KW_DESC) (KW_DATABASE|KW_SCHEMA) KW_EXTENDED? (dbName=identifier) -> ^(TOK_DESCDATABASE $dbName KW_EXTENDED?) ; + analyzeStatement @init { pushMsg("analyze statement", state); } @after { popMsg(state); } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzerFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzerFactory.java index 268920a..3dfce99 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzerFactory.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzerFactory.java @@ -109,6 +109,7 @@ commandType.put(HiveParser.TOK_ALTERTABLE_PARTCOLTYPE, HiveOperation.ALTERTABLE_PARTCOLTYPE); commandType.put(HiveParser.TOK_SHOW_COMPACTIONS, HiveOperation.SHOW_COMPACTIONS); commandType.put(HiveParser.TOK_SHOW_TRANSACTIONS, HiveOperation.SHOW_TRANSACTIONS); + commandType.put(HiveParser.TOK_ALTERTABLE_UPDATECOLSTATS, HiveOperation.ALTERTABLE_UPDATETABLESTATS); } static { @@ -231,12 +232,14 @@ public static BaseSemanticAnalyzer get(HiveConf conf, ASTNode tree) case HiveParser.TOK_TRUNCATETABLE: case HiveParser.TOK_EXCHANGEPARTITION: case HiveParser.TOK_SHOW_SET_ROLE: - + case HiveParser.TOK_ALTERTABLE_UPDATECOLSTATS: return new DDLSemanticAnalyzer(conf); case HiveParser.TOK_ALTERTABLE_PARTITION: HiveOperation commandType = null; Integer type = ((ASTNode) tree.getChild(1)).getToken().getType(); - if (tree.getChild(0).getChildCount() > 1) { + if (type == HiveParser.TOK_ALTERTABLE_UPDATECOLSTATS) { + commandType = HiveOperation.ALTERTABLE_UPDATEPARTSTATS; + } else if (tree.getChild(0).getChildCount() > 1) { commandType = tablePartitionCommandType.get(type)[1]; } else { commandType = tablePartitionCommandType.get(type)[0]; diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/ColumnStatsUpdateWork.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/ColumnStatsUpdateWork.java new file mode 100644 index 0000000..d644155 --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/ColumnStatsUpdateWork.java @@ -0,0 +1,65 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.plan; + +import java.io.Serializable; +import java.util.List; +import java.util.Map; + +/** + * ColumnStatsUpdateWork implementation. ColumnStatsUpdateWork will persist the + * colStats into metastore. Work corresponds to statement like ALTER TABLE + * src_stat UPDATE STATISTICS for column key SET + * ('numDVs'='1111','avgColLen'='1.111'); ALTER TABLE src_stat_part + * PARTITION(partitionId=100) UPDATE STATISTICS for column value SET + * ('maxColLen'='4444','avgColLen'='44.4'); + */ +@Explain(displayName = "Column Stats Update Work") +public class ColumnStatsUpdateWork implements Serializable { + private static final long serialVersionUID = 1L; + private ColumnStatsDesc colStats; + private String partName; + private Map mapProp; + + public ColumnStatsUpdateWork(ColumnStatsDesc colStats, String partName, + Map mapProp) { + this.partName = partName; + this.colStats = colStats; + this.mapProp = mapProp; + } + + @Override + public String toString() { + return null; + } + + @Explain(displayName = "Column Stats Desc") + public ColumnStatsDesc getColStats() { + return colStats; + } + + public String getPartName() { + return partName; + } + + public Map getMapProp() { + return mapProp; + } + +} diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/HiveOperation.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/HiveOperation.java index 6a447ea..14ce6aa 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/HiveOperation.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/HiveOperation.java @@ -38,6 +38,8 @@ ALTERTABLE_REPLACECOLS("ALTERTABLE_REPLACECOLS", new Privilege[]{Privilege.ALTER_METADATA}, null), ALTERTABLE_RENAMECOL("ALTERTABLE_RENAMECOL", new Privilege[]{Privilege.ALTER_METADATA}, null), ALTERTABLE_RENAMEPART("ALTERTABLE_RENAMEPART", new Privilege[]{Privilege.DROP}, new Privilege[]{Privilege.CREATE}), + ALTERTABLE_UPDATEPARTSTATS("ALTERTABLE_UPDATEPARTSTATS", new Privilege[]{Privilege.ALTER_METADATA}, null), + ALTERTABLE_UPDATETABLESTATS("ALTERTABLE_UPDATETABLESTATS", new Privilege[]{Privilege.ALTER_METADATA}, null), ALTERTABLE_RENAME("ALTERTABLE_RENAME", new Privilege[]{Privilege.ALTER_METADATA}, null), ALTERTABLE_DROPPARTS("ALTERTABLE_DROPPARTS", new Privilege[]{Privilege.DROP}, null), // The location is input and table is output for alter-table add partitions diff --git a/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/HiveOperationType.java b/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/HiveOperationType.java index 81366e3..29ae4a0 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/HiveOperationType.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/HiveOperationType.java @@ -59,6 +59,8 @@ ANALYZE_TABLE, ALTERTABLE_BUCKETNUM, ALTERPARTITION_BUCKETNUM, + ALTERTABLE_UPDATETABLESTATS, + ALTERTABLE_UPDATEPARTSTATS, SHOWDATABASES, SHOWTABLES, SHOWCOLUMNS, diff --git a/ql/src/test/queries/clientpositive/alter_partition_update_status.q b/ql/src/test/queries/clientpositive/alter_partition_update_status.q new file mode 100644 index 0000000..1eee9a5 --- /dev/null +++ b/ql/src/test/queries/clientpositive/alter_partition_update_status.q @@ -0,0 +1,25 @@ +create table src_stat_part_one(key string, value string) partitioned by (partitionId int); + +insert overwrite table src_stat_part_one partition (partitionId=1) + select * from src1; + +ANALYZE TABLE src_stat_part_one PARTITION(partitionId=1) COMPUTE STATISTICS for columns; + +describe formatted src_stat_part_one.key PARTITION(partitionId=1); + +ALTER TABLE src_stat_part_one PARTITION(partitionId=1) UPDATE STATISTICS for column key SET ('numDVs'='11','avgColLen'='2.2'); + +describe formatted src_stat_part_one.key PARTITION(partitionId=1); + +create table src_stat_part_two(key string, value string) partitioned by (px int, py string); + +insert overwrite table src_stat_part_two partition (px=1, py='a') + select * from src1; + +ANALYZE TABLE src_stat_part_two PARTITION(px=1) COMPUTE STATISTICS for columns; + +describe formatted src_stat_part_two.key PARTITION(px=1, py='a'); + +ALTER TABLE src_stat_part_two PARTITION(px=1, py='a') UPDATE STATISTICS for column key SET ('numDVs'='30','maxColLen'='40'); + +describe formatted src_stat_part_two.key PARTITION(px=1, py='a'); \ No newline at end of file diff --git a/ql/src/test/queries/clientpositive/alter_table_update_status.q b/ql/src/test/queries/clientpositive/alter_table_update_status.q new file mode 100644 index 0000000..fd45cd4 --- /dev/null +++ b/ql/src/test/queries/clientpositive/alter_table_update_status.q @@ -0,0 +1,30 @@ +create table src_stat as select * from src1; + +create table src_stat_int ( + key double, + value string +); + +LOAD DATA LOCAL INPATH '../../data/files/kv3.txt' INTO TABLE src_stat_int; + +set hive.stats.dbclass=jdbc:derby; + +ANALYZE TABLE src_stat COMPUTE STATISTICS for columns key; + +describe formatted src_stat.key; + +ALTER TABLE src_stat UPDATE STATISTICS for column key SET ('numDVs'='1111','avgColLen'='1.111'); + +describe formatted src_stat.key; + +ALTER TABLE src_stat UPDATE STATISTICS for column value SET ('numDVs'='121','numNulls'='122','avgColLen'='1.23','maxColLen'='124'); + +describe formatted src_stat.value; + +ANALYZE TABLE src_stat_int COMPUTE STATISTICS for columns key; + +describe formatted src_stat_int.key; + +ALTER TABLE src_stat_int UPDATE STATISTICS for column key SET ('numDVs'='2222','lowValue'='333.22','highValue'='22.22'); + +describe formatted src_stat_int.key; \ No newline at end of file diff --git a/ql/src/test/results/clientpositive/alter_partition_update_status.q.out b/ql/src/test/results/clientpositive/alter_partition_update_status.q.out new file mode 100644 index 0000000..165caf3 --- /dev/null +++ b/ql/src/test/results/clientpositive/alter_partition_update_status.q.out @@ -0,0 +1,102 @@ +PREHOOK: query: create table src_stat_part_one(key string, value string) partitioned by (partitionId int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +POSTHOOK: query: create table src_stat_part_one(key string, value string) partitioned by (partitionId int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@src_stat_part_one +PREHOOK: query: insert overwrite table src_stat_part_one partition (partitionId=1) + select * from src1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src1 +PREHOOK: Output: default@src_stat_part_one@partitionid=1 +POSTHOOK: query: insert overwrite table src_stat_part_one partition (partitionId=1) + select * from src1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src1 +POSTHOOK: Output: default@src_stat_part_one@partitionid=1 +POSTHOOK: Lineage: src_stat_part_one PARTITION(partitionid=1).key SIMPLE [(src1)src1.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: src_stat_part_one PARTITION(partitionid=1).value SIMPLE [(src1)src1.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: ANALYZE TABLE src_stat_part_one PARTITION(partitionId=1) COMPUTE STATISTICS for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@src_stat_part_one +PREHOOK: Input: default@src_stat_part_one@partitionid=1 +#### A masked pattern was here #### +POSTHOOK: query: ANALYZE TABLE src_stat_part_one PARTITION(partitionId=1) COMPUTE STATISTICS for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src_stat_part_one +POSTHOOK: Input: default@src_stat_part_one@partitionid=1 +#### A masked pattern was here #### +PREHOOK: query: describe formatted src_stat_part_one.key PARTITION(partitionId=1) +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@src_stat_part_one +POSTHOOK: query: describe formatted src_stat_part_one.key PARTITION(partitionId=1) +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@src_stat_part_one +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment + +key string 0 14 1.72 3 from deserializer +PREHOOK: query: ALTER TABLE src_stat_part_one PARTITION(partitionId=1) UPDATE STATISTICS for column key SET ('numDVs'='11','avgColLen'='2.2') +PREHOOK: type: ALTERTABLE_UPDATEPARTSTATS +POSTHOOK: query: ALTER TABLE src_stat_part_one PARTITION(partitionId=1) UPDATE STATISTICS for column key SET ('numDVs'='11','avgColLen'='2.2') +POSTHOOK: type: ALTERTABLE_UPDATEPARTSTATS +PREHOOK: query: describe formatted src_stat_part_one.key PARTITION(partitionId=1) +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@src_stat_part_one +POSTHOOK: query: describe formatted src_stat_part_one.key PARTITION(partitionId=1) +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@src_stat_part_one +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment + +key string 0 11 2.2 3 from deserializer +PREHOOK: query: create table src_stat_part_two(key string, value string) partitioned by (px int, py string) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +POSTHOOK: query: create table src_stat_part_two(key string, value string) partitioned by (px int, py string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@src_stat_part_two +PREHOOK: query: insert overwrite table src_stat_part_two partition (px=1, py='a') + select * from src1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src1 +PREHOOK: Output: default@src_stat_part_two@px=1/py=a +POSTHOOK: query: insert overwrite table src_stat_part_two partition (px=1, py='a') + select * from src1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src1 +POSTHOOK: Output: default@src_stat_part_two@px=1/py=a +POSTHOOK: Lineage: src_stat_part_two PARTITION(px=1,py=a).key SIMPLE [(src1)src1.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: src_stat_part_two PARTITION(px=1,py=a).value SIMPLE [(src1)src1.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: ANALYZE TABLE src_stat_part_two PARTITION(px=1) COMPUTE STATISTICS for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@src_stat_part_two +PREHOOK: Input: default@src_stat_part_two@px=1/py=a +#### A masked pattern was here #### +POSTHOOK: query: ANALYZE TABLE src_stat_part_two PARTITION(px=1) COMPUTE STATISTICS for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src_stat_part_two +POSTHOOK: Input: default@src_stat_part_two@px=1/py=a +#### A masked pattern was here #### +PREHOOK: query: describe formatted src_stat_part_two.key PARTITION(px=1, py='a') +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@src_stat_part_two +POSTHOOK: query: describe formatted src_stat_part_two.key PARTITION(px=1, py='a') +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@src_stat_part_two +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment + +key string 0 14 1.72 3 from deserializer +PREHOOK: query: ALTER TABLE src_stat_part_two PARTITION(px=1, py='a') UPDATE STATISTICS for column key SET ('numDVs'='30','maxColLen'='40') +PREHOOK: type: ALTERTABLE_UPDATEPARTSTATS +POSTHOOK: query: ALTER TABLE src_stat_part_two PARTITION(px=1, py='a') UPDATE STATISTICS for column key SET ('numDVs'='30','maxColLen'='40') +POSTHOOK: type: ALTERTABLE_UPDATEPARTSTATS +PREHOOK: query: describe formatted src_stat_part_two.key PARTITION(px=1, py='a') +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@src_stat_part_two +POSTHOOK: query: describe formatted src_stat_part_two.key PARTITION(px=1, py='a') +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@src_stat_part_two +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment + +key string 0 30 1.72 40 from deserializer diff --git a/ql/src/test/results/clientpositive/alter_table_update_status.q.out b/ql/src/test/results/clientpositive/alter_table_update_status.q.out new file mode 100644 index 0000000..3e294ba --- /dev/null +++ b/ql/src/test/results/clientpositive/alter_table_update_status.q.out @@ -0,0 +1,101 @@ +PREHOOK: query: create table src_stat as select * from src1 +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@src1 +POSTHOOK: query: create table src_stat as select * from src1 +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@src1 +POSTHOOK: Output: default@src_stat +PREHOOK: query: create table src_stat_int ( + key double, + value string +) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +POSTHOOK: query: create table src_stat_int ( + key double, + value string +) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@src_stat_int +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/kv3.txt' INTO TABLE src_stat_int +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@src_stat_int +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/kv3.txt' INTO TABLE src_stat_int +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@src_stat_int +PREHOOK: query: ANALYZE TABLE src_stat COMPUTE STATISTICS for columns key +PREHOOK: type: QUERY +PREHOOK: Input: default@src_stat +#### A masked pattern was here #### +POSTHOOK: query: ANALYZE TABLE src_stat COMPUTE STATISTICS for columns key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src_stat +#### A masked pattern was here #### +PREHOOK: query: describe formatted src_stat.key +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@src_stat +POSTHOOK: query: describe formatted src_stat.key +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@src_stat +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment + +key string 0 14 1.72 3 from deserializer +PREHOOK: query: ALTER TABLE src_stat UPDATE STATISTICS for column key SET ('numDVs'='1111','avgColLen'='1.111') +PREHOOK: type: ALTERTABLE_UPDATETABLESTATS +POSTHOOK: query: ALTER TABLE src_stat UPDATE STATISTICS for column key SET ('numDVs'='1111','avgColLen'='1.111') +POSTHOOK: type: ALTERTABLE_UPDATETABLESTATS +PREHOOK: query: describe formatted src_stat.key +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@src_stat +POSTHOOK: query: describe formatted src_stat.key +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@src_stat +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment + +key string 0 1111 1.111 3 from deserializer +PREHOOK: query: ALTER TABLE src_stat UPDATE STATISTICS for column value SET ('numDVs'='121','numNulls'='122','avgColLen'='1.23','maxColLen'='124') +PREHOOK: type: ALTERTABLE_UPDATETABLESTATS +POSTHOOK: query: ALTER TABLE src_stat UPDATE STATISTICS for column value SET ('numDVs'='121','numNulls'='122','avgColLen'='1.23','maxColLen'='124') +POSTHOOK: type: ALTERTABLE_UPDATETABLESTATS +PREHOOK: query: describe formatted src_stat.value +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@src_stat +POSTHOOK: query: describe formatted src_stat.value +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@src_stat +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment + +value string 122 121 1.23 124 from deserializer +PREHOOK: query: ANALYZE TABLE src_stat_int COMPUTE STATISTICS for columns key +PREHOOK: type: QUERY +PREHOOK: Input: default@src_stat_int +#### A masked pattern was here #### +POSTHOOK: query: ANALYZE TABLE src_stat_int COMPUTE STATISTICS for columns key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src_stat_int +#### A masked pattern was here #### +PREHOOK: query: describe formatted src_stat_int.key +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@src_stat_int +POSTHOOK: query: describe formatted src_stat_int.key +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@src_stat_int +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment + +key double 66.0 406.0 10 14 from deserializer +PREHOOK: query: ALTER TABLE src_stat_int UPDATE STATISTICS for column key SET ('numDVs'='2222','lowValue'='333.22','highValue'='22.22') +PREHOOK: type: ALTERTABLE_UPDATETABLESTATS +POSTHOOK: query: ALTER TABLE src_stat_int UPDATE STATISTICS for column key SET ('numDVs'='2222','lowValue'='333.22','highValue'='22.22') +POSTHOOK: type: ALTERTABLE_UPDATETABLESTATS +PREHOOK: query: describe formatted src_stat_int.key +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@src_stat_int +POSTHOOK: query: describe formatted src_stat_int.key +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@src_stat_int +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment + +key double 333.22 22.22 10 2222 from deserializer