diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/StatObjectConverter.java b/metastore/src/java/org/apache/hadoop/hive/metastore/StatObjectConverter.java index c3e2820..3f15a7c 100644 --- a/metastore/src/java/org/apache/hadoop/hive/metastore/StatObjectConverter.java +++ b/metastore/src/java/org/apache/hadoop/hive/metastore/StatObjectConverter.java @@ -64,38 +64,54 @@ public static MTableColumnStatistics convertToMTableColumnStatistics(MTable tabl if (statsObj.getStatsData().isSetBooleanStats()) { BooleanColumnStatsData boolStats = statsObj.getStatsData().getBooleanStats(); - mColStats.setBooleanStats(boolStats.getNumTrues(), boolStats.getNumFalses(), - boolStats.getNumNulls()); + mColStats.setBooleanStats( + boolStats.isSetNumTrues() ? boolStats.getNumTrues() : null, + boolStats.isSetNumFalses() ? boolStats.getNumFalses() : null, + boolStats.isSetNumNulls() ? boolStats.getNumNulls() : null); } else if (statsObj.getStatsData().isSetLongStats()) { LongColumnStatsData longStats = statsObj.getStatsData().getLongStats(); - mColStats.setLongStats(longStats.getNumNulls(), longStats.getNumDVs(), + mColStats.setLongStats( + longStats.isSetNumNulls() ? longStats.getNumNulls() : null, + longStats.isSetNumDVs() ? longStats.getNumDVs() : null, longStats.isSetLowValue() ? longStats.getLowValue() : null, longStats.isSetHighValue() ? longStats.getHighValue() : null); } else if (statsObj.getStatsData().isSetDoubleStats()) { DoubleColumnStatsData doubleStats = statsObj.getStatsData().getDoubleStats(); - mColStats.setDoubleStats(doubleStats.getNumNulls(), doubleStats.getNumDVs(), + mColStats.setDoubleStats( + doubleStats.isSetNumNulls() ? doubleStats.getNumNulls() : null, + doubleStats.isSetNumDVs() ? doubleStats.getNumDVs() : null, doubleStats.isSetLowValue() ? doubleStats.getLowValue() : null, doubleStats.isSetHighValue() ? doubleStats.getHighValue() : null); } else if (statsObj.getStatsData().isSetDecimalStats()) { DecimalColumnStatsData decimalStats = statsObj.getStatsData().getDecimalStats(); String low = decimalStats.isSetLowValue() ? createJdoDecimalString(decimalStats.getLowValue()) : null; String high = decimalStats.isSetHighValue() ? createJdoDecimalString(decimalStats.getHighValue()) : null; - mColStats.setDecimalStats(decimalStats.getNumNulls(), decimalStats.getNumDVs(), low, high); + mColStats.setDecimalStats( + decimalStats.isSetNumNulls() ? decimalStats.getNumNulls() : null, + decimalStats.isSetNumDVs() ? decimalStats.getNumDVs() : null, + low, high); } else if (statsObj.getStatsData().isSetStringStats()) { StringColumnStatsData stringStats = statsObj.getStatsData().getStringStats(); - mColStats.setStringStats(stringStats.getNumNulls(), stringStats.getNumDVs(), - stringStats.getMaxColLen(), stringStats.getAvgColLen()); + mColStats.setStringStats( + stringStats.isSetNumNulls() ? stringStats.getNumNulls() : null, + stringStats.isSetNumDVs() ? stringStats.getNumDVs() : null, + stringStats.isSetMaxColLen() ? stringStats.getMaxColLen() : null, + stringStats.isSetAvgColLen() ? stringStats.getAvgColLen() : null); } else if (statsObj.getStatsData().isSetBinaryStats()) { BinaryColumnStatsData binaryStats = statsObj.getStatsData().getBinaryStats(); - mColStats.setBinaryStats(binaryStats.getNumNulls(), binaryStats.getMaxColLen(), - binaryStats.getAvgColLen()); + mColStats.setBinaryStats( + binaryStats.isSetNumNulls() ? binaryStats.getNumNulls() : null, + binaryStats.isSetMaxColLen() ? binaryStats.getMaxColLen() : null, + binaryStats.isSetAvgColLen() ? binaryStats.getAvgColLen() : null); } return mColStats; } public static void setFieldsIntoOldStats( MTableColumnStatistics mStatsObj, MTableColumnStatistics oldStatsObj) { - oldStatsObj.setAvgColLen(mStatsObj.getAvgColLen()); + if (mStatsObj.getAvgColLen() != null) { + oldStatsObj.setAvgColLen(mStatsObj.getAvgColLen()); + } if (mStatsObj.getLongHighValue() != null) { oldStatsObj.setLongHighValue(mStatsObj.getLongHighValue()); } @@ -114,29 +130,63 @@ public static void setFieldsIntoOldStats( if (mStatsObj.getDecimalHighValue() != null) { oldStatsObj.setDecimalHighValue(mStatsObj.getDecimalHighValue()); } - oldStatsObj.setMaxColLen(mStatsObj.getMaxColLen()); - oldStatsObj.setNumDVs(mStatsObj.getNumDVs()); - oldStatsObj.setNumFalses(mStatsObj.getNumFalses()); - oldStatsObj.setNumTrues(mStatsObj.getNumTrues()); - oldStatsObj.setNumNulls(mStatsObj.getNumNulls()); + if (mStatsObj.getMaxColLen() != null) { + oldStatsObj.setMaxColLen(mStatsObj.getMaxColLen()); + } + if (mStatsObj.getNumDVs() != null) { + oldStatsObj.setNumDVs(mStatsObj.getNumDVs()); + } + if (mStatsObj.getNumFalses() != null) { + oldStatsObj.setNumFalses(mStatsObj.getNumFalses()); + } + if (mStatsObj.getNumTrues() != null) { + oldStatsObj.setNumTrues(mStatsObj.getNumTrues()); + } + if (mStatsObj.getNumNulls() != null) { + oldStatsObj.setNumNulls(mStatsObj.getNumNulls()); + } oldStatsObj.setLastAnalyzed(mStatsObj.getLastAnalyzed()); } public static void setFieldsIntoOldStats( MPartitionColumnStatistics mStatsObj, MPartitionColumnStatistics oldStatsObj) { - oldStatsObj.setAvgColLen(mStatsObj.getAvgColLen()); - oldStatsObj.setLongHighValue(mStatsObj.getLongHighValue()); - oldStatsObj.setDoubleHighValue(mStatsObj.getDoubleHighValue()); + if (mStatsObj.getAvgColLen() != null) { + oldStatsObj.setAvgColLen(mStatsObj.getAvgColLen()); + } + if (mStatsObj.getLongHighValue() != null) { + oldStatsObj.setLongHighValue(mStatsObj.getLongHighValue()); + } + if (mStatsObj.getDoubleHighValue() != null) { + oldStatsObj.setDoubleHighValue(mStatsObj.getDoubleHighValue()); + } oldStatsObj.setLastAnalyzed(mStatsObj.getLastAnalyzed()); - oldStatsObj.setLongLowValue(mStatsObj.getLongLowValue()); - oldStatsObj.setDoubleLowValue(mStatsObj.getDoubleLowValue()); - oldStatsObj.setDecimalLowValue(mStatsObj.getDecimalLowValue()); - oldStatsObj.setDecimalHighValue(mStatsObj.getDecimalHighValue()); - oldStatsObj.setMaxColLen(mStatsObj.getMaxColLen()); - oldStatsObj.setNumDVs(mStatsObj.getNumDVs()); - oldStatsObj.setNumFalses(mStatsObj.getNumFalses()); - oldStatsObj.setNumTrues(mStatsObj.getNumTrues()); - oldStatsObj.setNumNulls(mStatsObj.getNumNulls()); + if (mStatsObj.getLongLowValue() != null) { + oldStatsObj.setLongLowValue(mStatsObj.getLongLowValue()); + } + if (mStatsObj.getDoubleLowValue() != null) { + oldStatsObj.setDoubleLowValue(mStatsObj.getDoubleLowValue()); + } + if (mStatsObj.getDecimalLowValue() != null) { + oldStatsObj.setDecimalLowValue(mStatsObj.getDecimalLowValue()); + } + if (mStatsObj.getDecimalHighValue() != null) { + oldStatsObj.setDecimalHighValue(mStatsObj.getDecimalHighValue()); + } + if (mStatsObj.getMaxColLen() != null) { + oldStatsObj.setMaxColLen(mStatsObj.getMaxColLen()); + } + if (mStatsObj.getNumDVs() != null) { + oldStatsObj.setNumDVs(mStatsObj.getNumDVs()); + } + if (mStatsObj.getNumFalses() != null) { + oldStatsObj.setNumFalses(mStatsObj.getNumFalses()); + } + if (mStatsObj.getNumTrues() != null) { + oldStatsObj.setNumTrues(mStatsObj.getNumTrues()); + } + if (mStatsObj.getNumNulls() != null) { + oldStatsObj.setNumNulls(mStatsObj.getNumNulls()); + } } public static ColumnStatisticsObj getTableColumnStatisticsObj( @@ -241,31 +291,45 @@ public static MPartitionColumnStatistics convertToMPartitionColumnStatistics( if (statsObj.getStatsData().isSetBooleanStats()) { BooleanColumnStatsData boolStats = statsObj.getStatsData().getBooleanStats(); - mColStats.setBooleanStats(boolStats.getNumTrues(), boolStats.getNumFalses(), - boolStats.getNumNulls()); + mColStats.setBooleanStats( + boolStats.isSetNumTrues() ? boolStats.getNumTrues() : null, + boolStats.isSetNumFalses() ? boolStats.getNumFalses() : null, + boolStats.isSetNumNulls() ? boolStats.getNumNulls() : null); } else if (statsObj.getStatsData().isSetLongStats()) { LongColumnStatsData longStats = statsObj.getStatsData().getLongStats(); - mColStats.setLongStats(longStats.getNumNulls(), longStats.getNumDVs(), + mColStats.setLongStats( + longStats.isSetNumNulls() ? longStats.getNumNulls() : null, + longStats.isSetNumDVs() ? longStats.getNumDVs() : null, longStats.isSetLowValue() ? longStats.getLowValue() : null, longStats.isSetHighValue() ? longStats.getHighValue() : null); } else if (statsObj.getStatsData().isSetDoubleStats()) { DoubleColumnStatsData doubleStats = statsObj.getStatsData().getDoubleStats(); - mColStats.setDoubleStats(doubleStats.getNumNulls(), doubleStats.getNumDVs(), + mColStats.setDoubleStats( + doubleStats.isSetNumNulls() ? doubleStats.getNumNulls() : null, + doubleStats.isSetNumDVs() ? doubleStats.getNumDVs() : null, doubleStats.isSetLowValue() ? doubleStats.getLowValue() : null, doubleStats.isSetHighValue() ? doubleStats.getHighValue() : null); } else if (statsObj.getStatsData().isSetDecimalStats()) { DecimalColumnStatsData decimalStats = statsObj.getStatsData().getDecimalStats(); String low = decimalStats.isSetLowValue() ? createJdoDecimalString(decimalStats.getLowValue()) : null; String high = decimalStats.isSetHighValue() ? createJdoDecimalString(decimalStats.getHighValue()) : null; - mColStats.setDecimalStats(decimalStats.getNumNulls(), decimalStats.getNumDVs(), low, high); + mColStats.setDecimalStats( + decimalStats.isSetNumNulls() ? decimalStats.getNumNulls() : null, + decimalStats.isSetNumDVs() ? decimalStats.getNumDVs() : null, + low, high); } else if (statsObj.getStatsData().isSetStringStats()) { StringColumnStatsData stringStats = statsObj.getStatsData().getStringStats(); - mColStats.setStringStats(stringStats.getNumNulls(), stringStats.getNumDVs(), - stringStats.getMaxColLen(), stringStats.getAvgColLen()); + mColStats.setStringStats( + stringStats.isSetNumNulls() ? stringStats.getNumNulls() : null, + stringStats.isSetNumDVs() ? stringStats.getNumDVs() : null, + stringStats.isSetMaxColLen() ? stringStats.getMaxColLen() : null, + stringStats.isSetAvgColLen() ? stringStats.getAvgColLen() : null); } else if (statsObj.getStatsData().isSetBinaryStats()) { BinaryColumnStatsData binaryStats = statsObj.getStatsData().getBinaryStats(); - mColStats.setBinaryStats(binaryStats.getNumNulls(), binaryStats.getMaxColLen(), - binaryStats.getAvgColLen()); + mColStats.setBinaryStats( + binaryStats.isSetNumNulls() ? binaryStats.getNumNulls() : null, + binaryStats.isSetMaxColLen() ? binaryStats.getMaxColLen() : null, + binaryStats.isSetAvgColLen() ? binaryStats.getAvgColLen() : null); } return mColStats; } diff --git a/metastore/src/model/org/apache/hadoop/hive/metastore/model/MPartitionColumnStatistics.java b/metastore/src/model/org/apache/hadoop/hive/metastore/model/MPartitionColumnStatistics.java index 89c31dc..1666dc3 100644 --- a/metastore/src/model/org/apache/hadoop/hive/metastore/model/MPartitionColumnStatistics.java +++ b/metastore/src/model/org/apache/hadoop/hive/metastore/model/MPartitionColumnStatistics.java @@ -46,12 +46,12 @@ private Double doubleHighValue; private String decimalLowValue; private String decimalHighValue; - private long numNulls; - private long numDVs; - private double avgColLen; - private long maxColLen; - private long numTrues; - private long numFalses; + private Long numNulls; + private Long numDVs; + private Double avgColLen; + private Long maxColLen; + private Long numTrues; + private Long numFalses; private long lastAnalyzed; public MPartitionColumnStatistics() {} @@ -72,7 +72,7 @@ public void setColName(String colName) { this.colName = colName; } - public long getNumNulls() { + public Long getNumNulls() { return numNulls; } @@ -80,7 +80,7 @@ public void setNumNulls(long numNulls) { this.numNulls = numNulls; } - public long getNumDVs() { + public Long getNumDVs() { return numDVs; } @@ -88,7 +88,7 @@ public void setNumDVs(long numDVs) { this.numDVs = numDVs; } - public double getAvgColLen() { + public Double getAvgColLen() { return avgColLen; } @@ -96,7 +96,7 @@ public void setAvgColLen(double avgColLen) { this.avgColLen = avgColLen; } - public long getMaxColLen() { + public Long getMaxColLen() { return maxColLen; } @@ -104,7 +104,7 @@ public void setMaxColLen(long maxColLen) { this.maxColLen = maxColLen; } - public long getNumTrues() { + public Long getNumTrues() { return numTrues; } @@ -112,7 +112,7 @@ public void setNumTrues(long numTrues) { this.numTrues = numTrues; } - public long getNumFalses() { + public Long getNumFalses() { return numFalses; } @@ -160,20 +160,20 @@ public void setColType(String colType) { this.colType = colType; } - public void setBooleanStats(long numTrues, long numFalses, long numNulls) { + public void setBooleanStats(Long numTrues, Long numFalses, Long numNulls) { this.numTrues = numTrues; this.numFalses = numFalses; this.numNulls = numNulls; } - public void setLongStats(long numNulls, long numNDVs, Long lowValue, Long highValue) { + public void setLongStats(Long numNulls, Long numNDVs, Long lowValue, Long highValue) { this.numNulls = numNulls; this.numDVs = numNDVs; this.longLowValue = lowValue; this.longHighValue = highValue; } - public void setDoubleStats(long numNulls, long numNDVs, Double lowValue, Double highValue) { + public void setDoubleStats(Long numNulls, Long numNDVs, Double lowValue, Double highValue) { this.numNulls = numNulls; this.numDVs = numNDVs; this.doubleLowValue = lowValue; @@ -181,21 +181,21 @@ public void setDoubleStats(long numNulls, long numNDVs, Double lowValue, Double } public void setDecimalStats( - long numNulls, long numNDVs, String lowValue, String highValue) { + Long numNulls, Long numNDVs, String lowValue, String highValue) { this.numNulls = numNulls; this.numDVs = numNDVs; this.decimalLowValue = lowValue; this.decimalHighValue = highValue; } - public void setStringStats(long numNulls, long numNDVs, long maxColLen, double avgColLen) { + public void setStringStats(Long numNulls, Long numNDVs, Long maxColLen, Double avgColLen) { this.numNulls = numNulls; this.numDVs = numNDVs; this.maxColLen = maxColLen; this.avgColLen = avgColLen; } - public void setBinaryStats(long numNulls, long maxColLen, double avgColLen) { + public void setBinaryStats(Long numNulls, Long maxColLen, Double avgColLen) { this.numNulls = numNulls; this.maxColLen = maxColLen; this.avgColLen = avgColLen; diff --git a/metastore/src/model/org/apache/hadoop/hive/metastore/model/MTableColumnStatistics.java b/metastore/src/model/org/apache/hadoop/hive/metastore/model/MTableColumnStatistics.java index 44bbab5..bce9f0f 100644 --- a/metastore/src/model/org/apache/hadoop/hive/metastore/model/MTableColumnStatistics.java +++ b/metastore/src/model/org/apache/hadoop/hive/metastore/model/MTableColumnStatistics.java @@ -44,12 +44,12 @@ private Double doubleHighValue; private String decimalLowValue; private String decimalHighValue; - private long numNulls; - private long numDVs; - private double avgColLen; - private long maxColLen; - private long numTrues; - private long numFalses; + private Long numNulls; + private Long numDVs; + private Double avgColLen; + private Long maxColLen; + private Long numTrues; + private Long numFalses; private long lastAnalyzed; public MTableColumnStatistics() {} @@ -86,7 +86,7 @@ public void setColType(String colType) { this.colType = colType; } - public long getNumNulls() { + public Long getNumNulls() { return numNulls; } @@ -94,7 +94,7 @@ public void setNumNulls(long numNulls) { this.numNulls = numNulls; } - public long getNumDVs() { + public Long getNumDVs() { return numDVs; } @@ -102,7 +102,7 @@ public void setNumDVs(long numDVs) { this.numDVs = numDVs; } - public double getAvgColLen() { + public Double getAvgColLen() { return avgColLen; } @@ -110,7 +110,7 @@ public void setAvgColLen(double avgColLen) { this.avgColLen = avgColLen; } - public long getMaxColLen() { + public Long getMaxColLen() { return maxColLen; } @@ -118,7 +118,7 @@ public void setMaxColLen(long maxColLen) { this.maxColLen = maxColLen; } - public long getNumTrues() { + public Long getNumTrues() { return numTrues; } @@ -126,7 +126,7 @@ public void setNumTrues(long numTrues) { this.numTrues = numTrues; } - public long getNumFalses() { + public Long getNumFalses() { return numFalses; } @@ -150,20 +150,20 @@ public void setDbName(String dbName) { this.dbName = dbName; } - public void setBooleanStats(long numTrues, long numFalses, long numNulls) { + public void setBooleanStats(Long numTrues, Long numFalses, Long numNulls) { this.numTrues = numTrues; this.numFalses = numFalses; this.numNulls = numNulls; } - public void setLongStats(long numNulls, long numNDVs, Long lowValue, Long highValue) { + public void setLongStats(Long numNulls, Long numNDVs, Long lowValue, Long highValue) { this.numNulls = numNulls; this.numDVs = numNDVs; this.longLowValue = lowValue; this.longHighValue = highValue; } - public void setDoubleStats(long numNulls, long numNDVs, Double lowValue, Double highValue) { + public void setDoubleStats(Long numNulls, Long numNDVs, Double lowValue, Double highValue) { this.numNulls = numNulls; this.numDVs = numNDVs; this.doubleLowValue = lowValue; @@ -171,21 +171,21 @@ public void setDoubleStats(long numNulls, long numNDVs, Double lowValue, Double } public void setDecimalStats( - long numNulls, long numNDVs, String lowValue, String highValue) { + Long numNulls, Long numNDVs, String lowValue, String highValue) { this.numNulls = numNulls; this.numDVs = numNDVs; this.decimalLowValue = lowValue; this.decimalHighValue = highValue; } - public void setStringStats(long numNulls, long numNDVs, long maxColLen, double avgColLen) { + public void setStringStats(Long numNulls, Long numNDVs, Long maxColLen, Double avgColLen) { this.numNulls = numNulls; this.numDVs = numNDVs; this.maxColLen = maxColLen; this.avgColLen = avgColLen; } - public void setBinaryStats(long numNulls, long maxColLen, double avgColLen) { + public void setBinaryStats(Long numNulls, Long maxColLen, Double avgColLen) { this.numNulls = numNulls; this.maxColLen = maxColLen; this.avgColLen = avgColLen; diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/ColumnStatsUpdateTask.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/ColumnStatsUpdateTask.java new file mode 100644 index 0000000..17fad61 --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/ColumnStatsUpdateTask.java @@ -0,0 +1,279 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec; + +import java.io.IOException; +import java.io.Serializable; +import java.math.BigDecimal; +import java.nio.ByteBuffer; +import java.util.HashMap; +import java.util.List; +import java.util.Map.Entry; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.metastore.api.BinaryColumnStatsData; +import org.apache.hadoop.hive.metastore.api.BooleanColumnStatsData; +import org.apache.hadoop.hive.metastore.api.ColumnStatistics; +import org.apache.hadoop.hive.metastore.api.ColumnStatisticsData; +import org.apache.hadoop.hive.metastore.api.ColumnStatisticsDesc; +import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj; +import org.apache.hadoop.hive.metastore.api.Decimal; +import org.apache.hadoop.hive.metastore.api.DecimalColumnStatsData; +import org.apache.hadoop.hive.metastore.api.DoubleColumnStatsData; +import org.apache.hadoop.hive.metastore.api.LongColumnStatsData; +import org.apache.hadoop.hive.metastore.api.MetaException; +import org.apache.hadoop.hive.metastore.api.StringColumnStatsData; +import org.apache.hadoop.hive.ql.DriverContext; +import org.apache.hadoop.hive.ql.QueryPlan; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.ColumnStatsDesc; +import org.apache.hadoop.hive.ql.plan.ColumnStatsUpdateWork; +import org.apache.hadoop.hive.ql.plan.api.StageType; +import org.apache.hadoop.hive.ql.session.SessionState; +import org.apache.hadoop.mapred.JobConf; +import org.apache.hadoop.util.StringUtils; + +/** + * ColumnStatsTask implementation. + **/ + +public class ColumnStatsUpdateTask extends Task + implements Serializable { + private static final long serialVersionUID = 1L; + private static transient final Log LOG = LogFactory + .getLog(ColumnStatsUpdateTask.class); + + public ColumnStatsUpdateTask() { + super(); + } + + @Override + public void initialize(HiveConf conf, QueryPlan queryPlan, DriverContext ctx) { + super.initialize(conf, queryPlan, ctx); + } + + private ColumnStatistics constructColumnStatsFromInput() { + + String dbName = SessionState.get().getCurrentDatabase(); + ColumnStatsDesc desc = work.getColStats(); + String tableName = desc.getTableName(); + String partName = work.getPartName(); + List colName = desc.getColName(); + List colType = desc.getColType(); + + ColumnStatisticsObj statsObj = new ColumnStatisticsObj(); + + statsObj.setColName(colName.get(0)); + + statsObj.setColType(colType.get(0)); + + ColumnStatisticsData statsData = new ColumnStatisticsData(); + + String s = colType.get(0); + + if (s.equalsIgnoreCase("long")) { + LongColumnStatsData longStats = new LongColumnStatsData(); + longStats.clear(); + HashMap mapProp = work.getMapProp(); + for (Entry entry : mapProp.entrySet()) { + String fName = entry.getKey(); + String value = entry.getValue(); + if (fName.equals("numNulls")) { + longStats.setNumNulls(Long.parseLong(value)); + } else if (fName.equals("numDVs")) { + longStats.setNumDVs(Long.parseLong(value)); + } else if (fName.equals("lowValue")) { + longStats.setLowValue(Long.parseLong(value)); + } else if (fName.equals("highValue")) { + longStats.setHighValue(Long.parseLong(value)); + } + } + statsData.setLongStats(longStats); + statsObj.setStatsData(statsData); + } else if (s.equalsIgnoreCase("double")) { + DoubleColumnStatsData doubleStats = new DoubleColumnStatsData(); + doubleStats.clear(); + HashMap mapProp = work.getMapProp(); + for (Entry entry : mapProp.entrySet()) { + String fName = entry.getKey(); + String value = entry.getValue(); + if (fName.equals("numNulls")) { + doubleStats.setNumNulls(Long.parseLong(value)); + } else if (fName.equals("numDVs")) { + doubleStats.setNumDVs(Long.parseLong(value)); + } else if (fName.equals("lowValue")) { + doubleStats.setLowValue(Double.parseDouble(value)); + } else if (fName.equals("highValue")) { + doubleStats.setHighValue(Double.parseDouble(value)); + } + } + statsData.setDoubleStats(doubleStats); + statsObj.setStatsData(statsData); + } else if (s.equalsIgnoreCase("string")) { + StringColumnStatsData stringStats = new StringColumnStatsData(); + stringStats.setMaxColLenIsSet(false); + stringStats.setAvgColLenIsSet(false); + stringStats.setNumNullsIsSet(false); + stringStats.setNumDVsIsSet(false); + HashMap mapProp = work.getMapProp(); + for (Entry entry : mapProp.entrySet()) { + String fName = entry.getKey(); + String value = entry.getValue(); + if (fName.equals("numNulls")) { + stringStats.setNumNulls(Long.parseLong(value)); + } else if (fName.equals("numDVs")) { + stringStats.setNumDVs(Long.parseLong(value)); + } else if (fName.equals("avgColLen")) { + stringStats.setAvgColLen(Double.parseDouble(value)); + } else if (fName.equals("maxColLen")) { + stringStats.setMaxColLen(Long.parseLong(value)); + } + } + statsData.setStringStats(stringStats); + statsObj.setStatsData(statsData); + } else if (s.equalsIgnoreCase("boolean")) { + BooleanColumnStatsData booleanStats = new BooleanColumnStatsData(); + booleanStats.clear(); + HashMap mapProp = work.getMapProp(); + for (Entry entry : mapProp.entrySet()) { + String fName = entry.getKey(); + String value = entry.getValue(); + if (fName.equals("numNulls")) { + booleanStats.setNumNulls(Long.parseLong(value)); + } else if (fName.equals("numTrues")) { + booleanStats.setNumTrues(Long.parseLong(value)); + } else if (fName.equals("numFalses")) { + booleanStats.setNumFalses(Long.parseLong(value)); + } + } + statsData.setBooleanStats(booleanStats); + statsObj.setStatsData(statsData); + } else if (s.equalsIgnoreCase("binary")) { + BinaryColumnStatsData binaryStats = new BinaryColumnStatsData(); + binaryStats.clear(); + HashMap mapProp = work.getMapProp(); + for (Entry entry : mapProp.entrySet()) { + String fName = entry.getKey(); + String value = entry.getValue(); + if (fName.equals("numNulls")) { + binaryStats.setNumNulls(Long.parseLong(value)); + } else if (fName.equals("avgColLen")) { + binaryStats.setAvgColLen(Double.parseDouble(value)); + } else if (fName.equals("maxColLen")) { + binaryStats.setMaxColLen(Long.parseLong(value)); + } + } + statsData.setBinaryStats(binaryStats); + statsObj.setStatsData(statsData); + } else if (s.equalsIgnoreCase("decimal")) { + DecimalColumnStatsData decimalStats = new DecimalColumnStatsData(); + decimalStats.clear(); + HashMap mapProp = work.getMapProp(); + for (Entry entry : mapProp.entrySet()) { + String fName = entry.getKey(); + String value = entry.getValue(); + if (fName.equals("numNulls")) { + decimalStats.setNumNulls(Long.parseLong(value)); + } else if (fName.equals("numDVs")) { + decimalStats.setNumDVs(Long.parseLong(value)); + } else if (fName.equals("lowValue")) { + BigDecimal d = new BigDecimal(value); + decimalStats.setLowValue(new Decimal(ByteBuffer.wrap(d + .unscaledValue().toByteArray()), (short) d.scale())); + } else if (fName.equals("highValue")) { + BigDecimal d = new BigDecimal(value); + decimalStats.setHighValue(new Decimal(ByteBuffer.wrap(d + .unscaledValue().toByteArray()), (short) d.scale())); + } + } + statsData.setDecimalStats(decimalStats); + statsObj.setStatsData(statsData); + } + + ColumnStatisticsDesc statsDesc = getColumnStatsDesc(dbName, tableName, + partName, partName == null); + ColumnStatistics colStats = new ColumnStatistics(); + colStats.setStatsDesc(statsDesc); + colStats.addToStatsObj(statsObj); + return colStats; + } + + private ColumnStatisticsDesc getColumnStatsDesc(String dbName, + String tableName, String partName, boolean isTblLevel) { + ColumnStatisticsDesc statsDesc = new ColumnStatisticsDesc(); + statsDesc.setDbName(dbName); + statsDesc.setTableName(tableName); + statsDesc.setIsTblLevel(isTblLevel); + if (!isTblLevel) { + statsDesc.setPartName(partName); + } else { + statsDesc.setPartName(null); + } + return statsDesc; + } + + private int persistTableStats() throws HiveException, MetaException, + IOException { + // Fetch result of the analyze table .. compute statistics for columns + // .. + // Construct a column statistics object from the result + ColumnStatistics colStats = constructColumnStatsFromInput(); + // Persist the column statistics object to the metastore + db.updateTableColumnStatistics(colStats); + return 0; + } + + private int persistPartitionStats() throws HiveException, MetaException, + IOException { + + // Fetch result of the analyze table partition (p1=c1).. compute + // statistics for columns .. + // Construct a column statistics object from the result + ColumnStatistics colStats = constructColumnStatsFromInput(); + // Persist the column statistics object to the metastore + db.updatePartitionColumnStatistics(colStats); + return 0; + } + + @Override + public int execute(DriverContext driverContext) { + try { + if (work.getColStats().isTblLevel()) { + return persistTableStats(); + } else { + return persistPartitionStats(); + } + } catch (Exception e) { + LOG.info(e); + } + return 1; + } + + @Override + public StageType getType() { + return StageType.COLUMNSTATS; + } + + @Override + public String getName() { + return "COLUMNSTATS UPDATE TASK"; + } +} diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/TaskFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/TaskFactory.java index 24dfed1..3d74459 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/TaskFactory.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/TaskFactory.java @@ -32,6 +32,7 @@ import org.apache.hadoop.hive.ql.io.merge.MergeWork; import org.apache.hadoop.hive.ql.io.rcfile.stats.PartialScanTask; import org.apache.hadoop.hive.ql.io.rcfile.stats.PartialScanWork; +import org.apache.hadoop.hive.ql.plan.ColumnStatsUpdateWork; import org.apache.hadoop.hive.ql.plan.ColumnStatsWork; import org.apache.hadoop.hive.ql.plan.ConditionalWork; import org.apache.hadoop.hive.ql.plan.CopyWork; @@ -92,6 +93,7 @@ public TaskTuple(Class workClass, Class> taskClass) { StatsTask.class)); taskvec.add(new TaskTuple(StatsNoJobWork.class, StatsNoJobTask.class)); taskvec.add(new TaskTuple(ColumnStatsWork.class, ColumnStatsTask.class)); + taskvec.add(new TaskTuple(ColumnStatsUpdateWork.class, ColumnStatsUpdateTask.class)); taskvec.add(new TaskTuple(MergeWork.class, MergeTask.class)); taskvec.add(new TaskTuple(DependencyCollectionWork.class, diff --git a/ql/src/java/org/apache/hadoop/hive/ql/metadata/SessionHiveMetaStoreClient.java b/ql/src/java/org/apache/hadoop/hive/ql/metadata/SessionHiveMetaStoreClient.java index 4300145..0a37c62 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/metadata/SessionHiveMetaStoreClient.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/metadata/SessionHiveMetaStoreClient.java @@ -13,6 +13,9 @@ import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.common.FileUtils; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.metastore.HiveMetaHook; import org.apache.hadoop.hive.metastore.HiveMetaHookLoader; import org.apache.hadoop.hive.metastore.HiveMetaStoreClient; import org.apache.hadoop.hive.metastore.IMetaStoreClient; diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java index 67a3aa7..14fa6c9 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java @@ -25,6 +25,7 @@ import java.net.URI; import java.net.URISyntaxException; import java.util.ArrayList; +import java.util.Arrays; import java.util.Collection; import java.util.HashMap; import java.util.HashSet; @@ -57,6 +58,7 @@ import org.apache.hadoop.hive.ql.Driver; import org.apache.hadoop.hive.ql.ErrorMsg; import org.apache.hadoop.hive.ql.exec.ArchiveUtils; +import org.apache.hadoop.hive.ql.exec.ColumnStatsUpdateTask; import org.apache.hadoop.hive.ql.exec.FetchTask; import org.apache.hadoop.hive.ql.exec.FunctionRegistry; import org.apache.hadoop.hive.ql.exec.Task; @@ -91,6 +93,8 @@ import org.apache.hadoop.hive.ql.plan.AlterTableDesc.AlterTableTypes; import org.apache.hadoop.hive.ql.plan.AlterTableExchangePartition; import org.apache.hadoop.hive.ql.plan.AlterTableSimpleDesc; +import org.apache.hadoop.hive.ql.plan.ColumnStatsDesc; +import org.apache.hadoop.hive.ql.plan.ColumnStatsUpdateWork; import org.apache.hadoop.hive.ql.plan.CreateDatabaseDesc; import org.apache.hadoop.hive.ql.plan.CreateIndexDesc; import org.apache.hadoop.hive.ql.plan.DDLWork; @@ -276,6 +280,8 @@ public void analyzeInternal(ASTNode ast) throws SemanticException { analyzeAlterTableClusterSort(ast, tableName, partSpec); } else if (ast.getToken().getType() == HiveParser.TOK_COMPACT) { analyzeAlterTableCompact(ast, tableName, partSpec); + } else if(ast.getToken().getType() == HiveParser.TOK_ALTERTABLE_UPDATECOLSTATS){ + analyzeAlterTableUpdateStats(ast,tblPart); } break; } @@ -378,6 +384,9 @@ public void analyzeInternal(ASTNode ast) throws SemanticException { case HiveParser.TOK_ALTERTABLE_RENAME: analyzeAlterTableRename(ast, false); break; + case HiveParser.TOK_ALTERTABLE_UPDATECOLSTATS: + analyzeAlterTableUpdateStats(ast); + break; case HiveParser.TOK_ALTERTABLE_TOUCH: analyzeAlterTableTouch(ast); break; @@ -507,6 +516,76 @@ public void analyzeInternal(ASTNode ast) throws SemanticException { } } + private void analyzeAlterTableUpdateStats(ASTNode ast) + throws SemanticException { + String tblName = getUnescapedName((ASTNode) ast.getChild(0)); + String colName = getUnescapedName((ASTNode) ast.getChild(1)); + HashMap mapProp = getProps((ASTNode) (ast.getChild(2)) + .getChild(0)); + + Table tbl = null; + try { + tbl = db.getTable(tblName); + } catch (HiveException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } + + String colType = null; + List cols = tbl.getCols(); + for (FieldSchema col : cols) { + if (colName.equalsIgnoreCase(col.getName())) { + colType = col.getType(); + break; + } + } + + ColumnStatsDesc cStatsDesc = new ColumnStatsDesc(tblName, + Arrays.asList(new String[] { colName }), + Arrays.asList(new String[] { colType }), true); + ColumnStatsUpdateTask cStatsUpdateTask = (ColumnStatsUpdateTask) TaskFactory + .get(new ColumnStatsUpdateWork(cStatsDesc, null, mapProp), conf); + rootTasks.add(cStatsUpdateTask); + } + + private void analyzeAlterTableUpdateStats(ASTNode ast,TablePartition tblPart) + throws SemanticException { + String tblName = tblPart.tableName; + HashMap partSpec = tblPart.partSpec; + assert partSpec.size()==1; + String partName = null; + for(Entry entry : partSpec.entrySet()){ + partName = entry.toString(); + } + String colName = getUnescapedName((ASTNode) ast.getChild(0)); + HashMap mapProp = getProps((ASTNode) (ast.getChild(1)) + .getChild(0)); + + Table tbl = null; + try { + tbl = db.getTable(tblName); + } catch (HiveException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } + + String colType = null; + List cols = tbl.getCols(); + for (FieldSchema col : cols) { + if (colName.equalsIgnoreCase(col.getName())) { + colType = col.getType(); + break; + } + } + + ColumnStatsDesc cStatsDesc = new ColumnStatsDesc(tblName, + Arrays.asList(new String[] { colName }), + Arrays.asList(new String[] { colType }), false); + ColumnStatsUpdateTask cStatsUpdateTask = (ColumnStatsUpdateTask) TaskFactory + .get(new ColumnStatsUpdateWork(cStatsDesc, partName, mapProp), conf); + rootTasks.add(cStatsUpdateTask); + } + private void analyzeSetShowRole(ASTNode ast) throws SemanticException { switch (ast.getChildCount()) { case 0: diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g b/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g index ab1188a..287e144 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g @@ -146,6 +146,7 @@ TOK_ALTERTABLE_ARCHIVE; TOK_ALTERTABLE_UNARCHIVE; TOK_ALTERTABLE_SERDEPROPERTIES; TOK_ALTERTABLE_SERIALIZER; +TOK_ALTERTABLE_UPDATECOLSTATS; TOK_TABLE_PARTITION; TOK_ALTERTABLE_FILEFORMAT; TOK_ALTERTABLE_LOCATION; @@ -938,6 +939,7 @@ alterTableStatementSuffix : alterStatementSuffixRename | alterStatementSuffixAddCol | alterStatementSuffixRenameCol + | alterStatementSuffixUpdateStatsCol | alterStatementSuffixDropPartitions | alterStatementSuffixAddPartitions | alterStatementSuffixTouch @@ -1030,6 +1032,13 @@ alterStatementSuffixRenameCol ->^(TOK_ALTERTABLE_RENAMECOL identifier $oldName $newName colType $comment? alterStatementChangeColPosition?) ; +alterStatementSuffixUpdateStatsCol +@init { pushMsg("update column statistics", state); } +@after { popMsg(state); } + : identifier KW_UPDATE KW_STATISTICS KW_FOR KW_COLUMN? colName=identifier KW_SET tableProperties (KW_COMMENT comment=StringLiteral)? + ->^(TOK_ALTERTABLE_UPDATECOLSTATS identifier $colName tableProperties $comment?) + ; + alterStatementChangeColPosition : first=KW_FIRST|KW_AFTER afterCol=identifier ->{$first != null}? ^(TOK_ALTERTABLE_CHANGECOL_AFTER_POSITION ) @@ -1132,6 +1141,7 @@ alterTblPartitionStatementSuffix | alterStatementSuffixMergeFiles | alterStatementSuffixSerdeProperties | alterStatementSuffixRenamePart + | alterStatementSuffixStatsPart | alterStatementSuffixBucketNum | alterTblPartitionStatementSuffixSkewedLocation | alterStatementSuffixClusterbySortby @@ -1223,6 +1233,13 @@ alterStatementSuffixRenamePart ->^(TOK_ALTERTABLE_RENAMEPART partitionSpec) ; +alterStatementSuffixStatsPart +@init { pushMsg("alter table stats partition statement", state); } +@after { popMsg(state); } + : KW_UPDATE KW_STATISTICS KW_FOR KW_COLUMN? colName=identifier KW_SET tableProperties (KW_COMMENT comment=StringLiteral)? + ->^(TOK_ALTERTABLE_UPDATECOLSTATS $colName tableProperties $comment?) + ; + alterStatementSuffixMergeFiles @init { pushMsg("", state); } @after { popMsg(state); } @@ -1302,6 +1319,7 @@ descStatement | (KW_DESCRIBE|KW_DESC) (KW_DATABASE|KW_SCHEMA) KW_EXTENDED? (dbName=identifier) -> ^(TOK_DESCDATABASE $dbName KW_EXTENDED?) ; + analyzeStatement @init { pushMsg("analyze statement", state); } @after { popMsg(state); } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzerFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzerFactory.java index 268920a..3251b72 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzerFactory.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzerFactory.java @@ -231,9 +231,12 @@ public static BaseSemanticAnalyzer get(HiveConf conf, ASTNode tree) case HiveParser.TOK_TRUNCATETABLE: case HiveParser.TOK_EXCHANGEPARTITION: case HiveParser.TOK_SHOW_SET_ROLE: - + case HiveParser.TOK_ALTERTABLE_UPDATECOLSTATS: return new DDLSemanticAnalyzer(conf); case HiveParser.TOK_ALTERTABLE_PARTITION: + if (tree.getChildCount() == 2 + && ((ASTNode) tree.getChild(1)).getToken().getType() == HiveParser.TOK_ALTERTABLE_UPDATECOLSTATS) + return new DDLSemanticAnalyzer(conf); HiveOperation commandType = null; Integer type = ((ASTNode) tree.getChild(1)).getToken().getType(); if (tree.getChild(0).getChildCount() > 1) { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/ColumnStatsUpdateWork.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/ColumnStatsUpdateWork.java new file mode 100644 index 0000000..2134ade --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/ColumnStatsUpdateWork.java @@ -0,0 +1,80 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.plan; + +import java.io.Serializable; +import java.util.HashMap; + +/** + * Column Stats Update Work. + * + */ +@Explain(displayName = "Column Stats Update Work") +public class ColumnStatsUpdateWork implements Serializable { + private static final long serialVersionUID = 1L; + private ColumnStatsDesc colStats; + private String partName; + private HashMap mapProp; + private static final int LIMIT = -1; + + public ColumnStatsUpdateWork() { + } + + public ColumnStatsUpdateWork(ColumnStatsDesc colStats, String partName, + HashMap mapProp) { + this.setPartName(partName); + this.setColStats(colStats); + this.mapProp = mapProp; + } + + @Override + public String toString() { + return null; + } + + @Explain(displayName = "Column Stats Desc") + public ColumnStatsDesc getColStats() { + return colStats; + } + + public String getPartName() { + return partName; + } + + public void setPartName(String partName) { + this.partName = partName; + } + + public void setColStats(ColumnStatsDesc colStats) { + this.colStats = colStats; + } + + public HashMap getMapProp() { + return mapProp; + } + + public void setMapProp(HashMap mapProp) { + this.mapProp = mapProp; + } + + public static int getLimit() { + return LIMIT; + } + +}