diff --git a/common/src/java/org/apache/hadoop/hive/common/StatsSetupConst.java b/common/src/java/org/apache/hadoop/hive/common/StatsSetupConst.java index 01e6010..59229da 100644 --- a/common/src/java/org/apache/hadoop/hive/common/StatsSetupConst.java +++ b/common/src/java/org/apache/hadoop/hive/common/StatsSetupConst.java @@ -19,19 +19,20 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.conf.HiveConf; +import org.json.JSONArray; import org.json.JSONException; import org.json.JSONObject; -import java.util.LinkedHashMap; +import java.util.ArrayList; import java.util.List; import java.util.Map; import org.slf4j.Logger; import org.slf4j.LoggerFactory; - /** - * A class that defines the constant strings used by the statistics implementation. + * A class that defines the constant strings used by the statistics + * implementation. */ public class StatsSetupConst { @@ -53,12 +54,16 @@ public String getAggregator(Configuration conf) { custom { @Override public String getPublisher(Configuration conf) { - return HiveConf.getVar(conf, HiveConf.ConfVars.HIVE_STATS_DEFAULT_PUBLISHER); } + return HiveConf.getVar(conf, HiveConf.ConfVars.HIVE_STATS_DEFAULT_PUBLISHER); + } + @Override public String getAggregator(Configuration conf) { - return HiveConf.getVar(conf, HiveConf.ConfVars.HIVE_STATS_DEFAULT_AGGREGATOR); } + return HiveConf.getVar(conf, HiveConf.ConfVars.HIVE_STATS_DEFAULT_AGGREGATOR); + } }; public abstract String getPublisher(Configuration conf); + public abstract String getAggregator(Configuration conf); } @@ -97,18 +102,20 @@ public String getAggregator(Configuration conf) { /** * @return List of all supported statistics */ - public static final String[] supportedStats = {NUM_FILES,ROW_COUNT,TOTAL_SIZE,RAW_DATA_SIZE}; + public static final String[] supportedStats = { NUM_FILES, ROW_COUNT, TOTAL_SIZE, RAW_DATA_SIZE }; /** - * @return List of all statistics that need to be collected during query execution. These are - * statistics that inherently require a scan of the data. + * @return List of all statistics that need to be collected during query + * execution. These are statistics that inherently require a scan of + * the data. */ - public static final String[] statsRequireCompute = new String[] {ROW_COUNT,RAW_DATA_SIZE}; + public static final String[] statsRequireCompute = new String[] { ROW_COUNT, RAW_DATA_SIZE }; /** - * @return List of statistics that can be collected quickly without requiring a scan of the data. + * @return List of statistics that can be collected quickly without requiring + * a scan of the data. */ - public static final String[] fastStats = new String[] {NUM_FILES,TOTAL_SIZE}; + public static final String[] fastStats = new String[] { NUM_FILES, TOTAL_SIZE }; // This string constant is used to indicate to AlterHandler that // alterPartition/alterTable is happening via statsTask or via user. @@ -118,13 +125,17 @@ public String getAggregator(Configuration conf) { public static final String USER = "USER"; - // This string constant is used by AlterHandler to figure out that it should not attempt to - // update stats. It is set by any client-side task which wishes to signal that no stats + // This string constant is used by AlterHandler to figure out that it should + // not attempt to + // update stats. It is set by any client-side task which wishes to signal that + // no stats // update should take place, such as with replication. public static final String DO_NOT_UPDATE_STATS = "DO_NOT_UPDATE_STATS"; - //This string constant will be persisted in metastore to indicate whether corresponding - //table or partition's statistics and table or partition's column statistics are accurate or not. + // This string constant will be persisted in metastore to indicate whether + // corresponding + // table or partition's statistics and table or partition's column statistics + // are accurate or not. public static final String COLUMN_STATS_ACCURATE = "COLUMN_STATS_ACCURATE"; public static final String COLUMN_STATS = "COLUMN_STATS"; @@ -137,23 +148,28 @@ public String getAggregator(Configuration conf) { public static final String FALSE = "false"; - // The parameter keys for the table statistics. Those keys are excluded from 'show create table' command output. - public static final String[] TABLE_PARAMS_STATS_KEYS = new String[] { - COLUMN_STATS_ACCURATE, NUM_FILES, TOTAL_SIZE,ROW_COUNT, RAW_DATA_SIZE, NUM_PARTITIONS}; + // The parameter keys for the table statistics. Those keys are excluded from + // 'show create table' command output. + public static final String[] TABLE_PARAMS_STATS_KEYS = new String[] { COLUMN_STATS_ACCURATE, + NUM_FILES, TOTAL_SIZE, ROW_COUNT, RAW_DATA_SIZE, NUM_PARTITIONS }; public static boolean areBasicStatsUptoDate(Map params) { String statsAcc = params.get(COLUMN_STATS_ACCURATE); if (statsAcc == null) { return false; } else { - JSONObject jsonObj; + JSONArray jsonArray; try { - jsonObj = new JSONObject(statsAcc); - if (jsonObj != null && jsonObj.has(BASIC_STATS)) { - return true; - } else { - return false; + jsonArray = new JSONArray(statsAcc); + if (jsonArray != null) { + for (int index = 0; index < jsonArray.length(); index++) { + JSONObject jsonObj = jsonArray.getJSONObject(index); + if (jsonObj.has(BASIC_STATS)) { + return true; + } + } } + return false; } catch (JSONException e) { // For backward compatibility, if previous value can not be parsed to a // json object, it will come here. @@ -175,19 +191,25 @@ public static boolean areColumnStatsUptoDate(Map params, String if (statsAcc == null) { return false; } else { - JSONObject jsonObj; + JSONArray jsonArray; try { - jsonObj = new JSONObject(statsAcc); - if (jsonObj == null || !jsonObj.has(COLUMN_STATS)) { - return false; - } else { - JSONObject columns = jsonObj.getJSONObject(COLUMN_STATS); - if (columns != null && columns.has(colName)) { - return true; - } else { - return false; + jsonArray = new JSONArray(statsAcc); + if (jsonArray != null) { + for (int index = 0; index < jsonArray.length(); index++) { + JSONObject jsonObj = jsonArray.getJSONObject(index); + if (jsonObj.has(COLUMN_STATS)) { + JSONArray columns = jsonObj.getJSONArray(COLUMN_STATS); + for (int c = 0; c < columns.length(); c++) { + JSONObject cObj = jsonArray.getJSONObject(c); + if (cObj != null && cObj.has(colName)) { + return true; + } + } + return false; + } } } + return false; } catch (JSONException e) { // For backward compatibility, if previous value can not be parsed to a // json object, it will come here. @@ -208,29 +230,46 @@ public static void setBasicStatsState(Map params, String setting } else { String statsAcc = params.get(COLUMN_STATS_ACCURATE); if (statsAcc == null) { - JSONObject stats = new JSONObject(new LinkedHashMap()); + JSONArray stats = new JSONArray(); // duplicate key is not possible try { - stats.put(BASIC_STATS, TRUE); + JSONObject obj = new JSONObject(); + obj.put(BASIC_STATS, TRUE); + stats.put(obj); } catch (JSONException e) { // impossible to throw any json exceptions. LOG.trace(e.getMessage()); } params.put(COLUMN_STATS_ACCURATE, stats.toString()); } else { - // statsAcc may not be jason format, which will throw exception - JSONObject stats; + // statsAcc may not be json format, which will throw exception + JSONArray stats; try { - stats = new JSONObject(statsAcc); + stats = new JSONArray(statsAcc); } catch (JSONException e) { // old format of statsAcc, e.g., TRUE or FALSE LOG.debug("In StatsSetupConst, JsonParser can not parse statsAcc."); - stats = new JSONObject(new LinkedHashMap()); + stats = new JSONArray(); + } + boolean basicStats = false; + for (int index = 0; index < stats.length(); index++) { + try { + JSONObject jsonObj = stats.getJSONObject(index); + if (jsonObj.has(BASIC_STATS)) { + basicStats = true; + break; + } + } catch (JSONException e) { + // impossible to throw any json exceptions. + LOG.trace(e.getMessage()); + } } - if (!stats.has(BASIC_STATS)) { + if (!basicStats) { // duplicate key is not possible try { - stats.put(BASIC_STATS, TRUE); + JSONObject obj = new JSONObject(); + obj.put(BASIC_STATS, TRUE); + stats.put(obj); } catch (JSONException e) { // impossible to throw any json exceptions. LOG.trace(e.getMessage()); @@ -244,56 +283,87 @@ public static void setBasicStatsState(Map params, String setting public static void setColumnStatsState(Map params, List colNames) { try { String statsAcc = params.get(COLUMN_STATS_ACCURATE); - JSONObject colStats = new JSONObject(new LinkedHashMap()); + JSONArray colStats = new JSONArray(); // duplicate key is not possible for (String colName : colNames) { - colStats.put(colName.toLowerCase(), TRUE); + JSONObject cobj = new JSONObject(); + cobj.put(colName.toLowerCase(), TRUE); + colStats.put(cobj); } if (statsAcc == null) { - JSONObject stats = new JSONObject(new LinkedHashMap()); + JSONArray stats = new JSONArray(); // duplicate key is not possible - stats.put(COLUMN_STATS, colStats); + JSONObject cobjs = new JSONObject(); + cobjs.put(COLUMN_STATS, colStats); + stats.put(cobjs); params.put(COLUMN_STATS_ACCURATE, stats.toString()); } else { - // statsAcc may not be jason format, which will throw exception - JSONObject stats; + // statsAcc may not be json format, which will throw exception + JSONArray stats; try { - stats = new JSONObject(statsAcc); + stats = new JSONArray(statsAcc); } catch (JSONException e) { // old format of statsAcc, e.g., TRUE or FALSE LOG.debug("In StatsSetupConst, JsonParser can not parse statsAcc."); - stats = new JSONObject(new LinkedHashMap()); + stats = new JSONArray(); try { if (statsAcc.equals(TRUE)) { - stats.put(BASIC_STATS, TRUE); - } else { - stats.put(BASIC_STATS, FALSE); + JSONObject obj = new JSONObject(); + obj.put(BASIC_STATS, TRUE); + stats.put(obj); } } catch (JSONException e1) { // impossible to throw any json exceptions. LOG.trace(e1.getMessage()); } } - if (!stats.has(COLUMN_STATS)) { + int columnStatsIndex = -1; + for (int index = 0; index < stats.length(); index++) { + try { + JSONObject jsonObj = stats.getJSONObject(index); + if (jsonObj.has(COLUMN_STATS)) { + columnStatsIndex = index; + break; + } + } catch (JSONException e) { + // impossible to throw any json exceptions. + LOG.trace(e.getMessage()); + } + } + if (columnStatsIndex == -1) { // duplicate key is not possible - stats.put(COLUMN_STATS, colStats); + JSONObject jsonObj = new JSONObject(); + jsonObj.put(COLUMN_STATS, colStats); + stats.put(jsonObj); } else { - // getJSONObject(COLUMN_STATS) should be found. - JSONObject allColumnStats = stats.getJSONObject(COLUMN_STATS); + // stats.getJSONArray(columnStatsIndex) should be found. + JSONArray allColumnStats = stats.getJSONArray(columnStatsIndex); + List missing = new ArrayList<>(); for (String colName : colNames) { - if (!allColumnStats.has(colName)) { - // duplicate key is not possible - allColumnStats.put(colName, TRUE); + boolean hasColumn = false; + for (int index = 0; index < allColumnStats.length(); index++) { + JSONObject jsonObj = allColumnStats.getJSONObject(index); + if (jsonObj.has(colName.toLowerCase())) { + hasColumn = true; + break; + } + } + if (!hasColumn) { + missing.add(colName); } } - stats.remove(COLUMN_STATS); + for (String colName : missing) { + JSONObject jsonObj = new JSONObject(); + jsonObj.put(colName.toLowerCase(), TRUE); + allColumnStats.put(jsonObj); + } // duplicate key is not possible - stats.put(COLUMN_STATS, allColumnStats); + stats.put(columnStatsIndex, allColumnStats); } params.put(COLUMN_STATS_ACCURATE, stats.toString()); } } catch (JSONException e) { - //impossible to throw any json exceptions. + // impossible to throw any json exceptions. LOG.trace(e.getMessage()); } } @@ -301,28 +371,25 @@ public static void setColumnStatsState(Map params, List public static void clearColumnStatsState(Map params) { String statsAcc; if (params != null && (statsAcc = params.get(COLUMN_STATS_ACCURATE)) != null) { - // statsAcc may not be jason format, which will throw exception - JSONObject stats; + // statsAcc may not be json format, which will throw exception + JSONArray stats; try { - stats = new JSONObject(statsAcc); + stats = new JSONArray(statsAcc); } catch (JSONException e) { // old format of statsAcc, e.g., TRUE or FALSE LOG.debug("In StatsSetupConst, JsonParser can not parse statsAcc."); - stats = new JSONObject(new LinkedHashMap()); + stats = new JSONArray(); try { if (statsAcc.equals(TRUE)) { - stats.put(BASIC_STATS, TRUE); - } else { - stats.put(BASIC_STATS, FALSE); + JSONObject obj = new JSONObject(); + obj.put(BASIC_STATS, TRUE); + stats.put(obj); } } catch (JSONException e1) { // impossible to throw any json exceptions. LOG.trace(e1.getMessage()); } } - if (stats.has(COLUMN_STATS)) { - stats.remove(COLUMN_STATS); - } params.put(COLUMN_STATS_ACCURATE, stats.toString()); } }