diff --git common/src/java/org/apache/hadoop/hive/common/StatsSetupConst.java common/src/java/org/apache/hadoop/hive/common/StatsSetupConst.java index 25c7508..7599bd8 100644 --- common/src/java/org/apache/hadoop/hive/common/StatsSetupConst.java +++ common/src/java/org/apache/hadoop/hive/common/StatsSetupConst.java @@ -17,19 +17,34 @@ */ package org.apache.hadoop.hive.common; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.hive.conf.HiveConf; -import org.json.JSONException; -import org.json.JSONObject; - -import java.util.LinkedHashMap; +import java.io.IOException; import java.util.List; import java.util.Map; import java.util.TreeMap; +import javax.management.RuntimeErrorException; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.conf.HiveConf; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import com.fasterxml.jackson.annotation.JsonInclude; +import com.fasterxml.jackson.annotation.JsonProperty; +import com.fasterxml.jackson.annotation.JsonRawValue; +import com.fasterxml.jackson.core.JsonGenerator; +import com.fasterxml.jackson.core.JsonParser; +import com.fasterxml.jackson.core.JsonProcessingException; +import com.fasterxml.jackson.databind.DeserializationContext; +import com.fasterxml.jackson.databind.JsonDeserializer; +import com.fasterxml.jackson.databind.JsonSerializer; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.fasterxml.jackson.databind.ObjectReader; +import com.fasterxml.jackson.databind.ObjectWriter; +import com.fasterxml.jackson.databind.SerializerProvider; +import com.fasterxml.jackson.databind.annotation.JsonDeserialize; +import com.fasterxml.jackson.databind.annotation.JsonSerialize; + /** * A class that defines the constant strings used by the statistics implementation. @@ -144,35 +159,61 @@ public String getAggregator(Configuration conf) { public static final String[] TABLE_PARAMS_STATS_KEYS = new String[] { COLUMN_STATS_ACCURATE, NUM_FILES, TOTAL_SIZE,ROW_COUNT, RAW_DATA_SIZE, NUM_PARTITIONS}; + private static class ColumnStatsAccurate { + private static ObjectReader objectReader; + private static ObjectWriter objectWriter; + + static { + ObjectMapper objectMapper = new ObjectMapper(); + objectReader = objectMapper.readerFor(ColumnStatsAccurate.class); + objectWriter = objectMapper.writerFor(ColumnStatsAccurate.class); + } + + static class BooleanSerializer extends JsonSerializer { + + @Override + public void serialize(Boolean value, JsonGenerator jsonGenerator, + SerializerProvider serializerProvider) throws IOException, JsonProcessingException { + jsonGenerator.writeString(value.toString()); + } + } + + static class BooleanDeserializer extends JsonDeserializer { + + public Boolean deserialize(JsonParser jsonParser, + DeserializationContext deserializationContext) + throws IOException, JsonProcessingException { + return Boolean.valueOf(jsonParser.getValueAsString()); + } + } + + @JsonSerialize(using = BooleanSerializer.class) + @JsonDeserialize(using = BooleanDeserializer.class) + @JsonProperty(BASIC_STATS) + boolean basicStats; + + @JsonInclude(JsonInclude.Include.NON_EMPTY) + @JsonProperty(COLUMN_STATS) + @JsonSerialize(contentUsing = BooleanSerializer.class) + @JsonDeserialize(contentUsing = BooleanDeserializer.class) + TreeMap columnStats = new TreeMap<>(); + + }; + public static boolean areBasicStatsUptoDate(Map params) { - JSONObject stats = parseStatsAcc(params.get(COLUMN_STATS_ACCURATE)); - if (stats != null && stats.has(BASIC_STATS)) { - return true; - } else { + if (params == null) { return false; } + ColumnStatsAccurate stats = parseStatsAcc2(params.get(COLUMN_STATS_ACCURATE)); + return stats.basicStats; } public static boolean areColumnStatsUptoDate(Map params, String colName) { - JSONObject stats = parseStatsAcc(params.get(COLUMN_STATS_ACCURATE)); - try { - if (!stats.has(COLUMN_STATS)) { - return false; - } else { - JSONObject columns = stats.getJSONObject(COLUMN_STATS); - if (columns != null && columns.has(colName)) { - return true; - } else { - return false; - } - } - } catch (JSONException e) { - // For backward compatibility, if previous value can not be parsed to a - // json object, it will come here. - LOG.debug("In StatsSetupConst, JsonParser can not parse COLUMN_STATS."); + if (params == null) { return false; } - + ColumnStatsAccurate stats = parseStatsAcc2(params.get(COLUMN_STATS_ACCURATE)); + return stats.columnStats.containsKey(colName); } // It will only throw JSONException when stats.put(BASIC_STATS, TRUE) @@ -180,55 +221,52 @@ public static boolean areColumnStatsUptoDate(Map params, String // note that set basic stats false will wipe out column stats too. public static void setBasicStatsState(Map params, String setting) { if (setting.equals(FALSE)) { - if (params != null && params.containsKey(COLUMN_STATS_ACCURATE)) { + if (params!=null && params.containsKey(COLUMN_STATS_ACCURATE)) { params.remove(COLUMN_STATS_ACCURATE); } - } else { - JSONObject stats = parseStatsAcc(params.get(COLUMN_STATS_ACCURATE)); - - try { - stats.put(BASIC_STATS, TRUE); - } catch (JSONException e) { - // impossible to throw any json exceptions. - LOG.trace(e.getMessage()); - } - params.put(COLUMN_STATS_ACCURATE, stats.toString()); + return; + } + if (params == null) { + throw new RuntimeException("params are null...cant set columnstatstate!"); + } + ColumnStatsAccurate stats = parseStatsAcc2(params.get(COLUMN_STATS_ACCURATE)); + stats.basicStats = true; + try { + params.put(COLUMN_STATS_ACCURATE, ColumnStatsAccurate.objectWriter.writeValueAsString(stats)); + } catch (JsonProcessingException e) { + throw new RuntimeException("can't serialize column stats", e); } } public static void setColumnStatsState(Map params, List colNames) { - try { - JSONObject stats = parseStatsAcc(params.get(COLUMN_STATS_ACCURATE)); + if (params == null) { + throw new RuntimeException("params are null...cant set columnstatstate!"); + } + ColumnStatsAccurate stats = parseStatsAcc2(params.get(COLUMN_STATS_ACCURATE)); - JSONObject colStats; - if (stats.has(COLUMN_STATS)) { - colStats = stats.getJSONObject(COLUMN_STATS); - } else { - colStats = new JSONObject(new TreeMap()); - } - for (String colName : colNames) { - if (!colStats.has(colName)) { - colStats.put(colName, TRUE); - } + for (String colName : colNames) { + if (!stats.columnStats.containsKey(colName)) { + stats.columnStats.put(colName, true); } - stats.put(COLUMN_STATS, colStats); - params.put(COLUMN_STATS_ACCURATE, stats.toString()); - } catch (JSONException e) { - // impossible to throw any json exceptions. + } + try { + params.put(COLUMN_STATS_ACCURATE, ColumnStatsAccurate.objectWriter.writeValueAsString(stats)); + } catch (JsonProcessingException e) { LOG.trace(e.getMessage()); } } public static void clearColumnStatsState(Map params) { - String statsAcc; - if (params != null && (statsAcc = params.get(COLUMN_STATS_ACCURATE)) != null) { - // statsAcc may not be jason format, which will throw exception - JSONObject stats = parseStatsAcc(statsAcc); - - if (stats.has(COLUMN_STATS)) { - stats.remove(COLUMN_STATS); - } - params.put(COLUMN_STATS_ACCURATE, stats.toString()); + if (params == null) { + return; + } + ColumnStatsAccurate stats = parseStatsAcc2(params.get(COLUMN_STATS_ACCURATE)); + stats.columnStats.clear(); + + try { + params.put(COLUMN_STATS_ACCURATE, ColumnStatsAccurate.objectWriter.writeValueAsString(stats)); + } catch (JsonProcessingException e) { + LOG.trace(e.getMessage()); } } @@ -241,34 +279,24 @@ public static void setBasicStatsStateForCreateTable(Map params, setBasicStatsState(params, setting); } - private static JSONObject parseStatsAcc(String statsAcc) { - if (statsAcc == null) { - return new JSONObject(new LinkedHashMap()); - } else { - try { - return new JSONObject(statsAcc); - } catch (JSONException e) { - return statsAccUpgrade(statsAcc); - } + private static ColumnStatsAccurate parseStatsAcc2(String statsAcc) { + if(statsAcc==null){ + return new ColumnStatsAccurate(); } - } - - private static JSONObject statsAccUpgrade(String statsAcc) { - JSONObject stats; - // old format of statsAcc, e.g., TRUE or FALSE - LOG.debug("In StatsSetupConst, JsonParser can not parse statsAcc."); - stats = new JSONObject(new LinkedHashMap()); - try { - if (statsAcc.equals(TRUE)) { - stats.put(BASIC_STATS, TRUE); - } else { - stats.put(BASIC_STATS, FALSE); - } - } catch (JSONException e1) { - // impossible to throw any json exceptions. - LOG.trace(e1.getMessage()); - } - return stats; - } + if(statsAcc.startsWith("{")){ + try { + return ColumnStatsAccurate.objectReader.readValue(statsAcc); + } catch (Exception e) { + /* this is a real error */ + throw new RuntimeException("can't parse statsAcc",e); + } + }else{ + ColumnStatsAccurate ret = new ColumnStatsAccurate(); + if("TRUE".equalsIgnoreCase(statsAcc)){ + ret.basicStats=true; + } + return ret; + } + } } diff --git common/src/test/org/apache/hadoop/hive/common/TestStatsSetupConst.java common/src/test/org/apache/hadoop/hive/common/TestStatsSetupConst.java index 7a7ad42..88c725a 100644 --- common/src/test/org/apache/hadoop/hive/common/TestStatsSetupConst.java +++ common/src/test/org/apache/hadoop/hive/common/TestStatsSetupConst.java @@ -53,4 +53,40 @@ public void testSetBasicStatsState_none() { assertEquals("{\"BASIC_STATS\":\"true\"}",params.get(StatsSetupConst.COLUMN_STATS_ACCURATE)); } + // earlier implementation have quoted boolean values...so the new implementation should preserve this + @Test + public void testStatColumnEntriesCompat() { + Map params0=new HashMap<>(); + StatsSetupConst.setBasicStatsState(params0, String.valueOf(true)); + StatsSetupConst.setColumnStatsState(params0, Lists.newArrayList("Foo")); + + assertEquals("{\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"Foo\":\"true\"}}",params0.get(StatsSetupConst.COLUMN_STATS_ACCURATE)); + } + + @Test + public void testColumnEntries_orderIndependence() { + Map params0=new HashMap<>(); + StatsSetupConst.setBasicStatsState(params0, String.valueOf(true)); + StatsSetupConst.setColumnStatsState(params0, Lists.newArrayList("Foo","Bar")); + Map params1=new HashMap<>(); + StatsSetupConst.setColumnStatsState(params1, Lists.newArrayList("Bar","Foo")); + StatsSetupConst.setBasicStatsState(params1, String.valueOf(true)); + + assertEquals(params0.get(StatsSetupConst.COLUMN_STATS_ACCURATE),params1.get(StatsSetupConst.COLUMN_STATS_ACCURATE)); + } + + @Test + public void testColumnEntries_orderIndependence2() { + Map params0=new HashMap<>(); + // in case jackson is able to deserialize...it may use a different implementation for the map - which may not preserve order + StatsSetupConst.setBasicStatsState(params0, String.valueOf(true)); + StatsSetupConst.setColumnStatsState(params0, Lists.newArrayList("year")); + StatsSetupConst.setColumnStatsState(params0, Lists.newArrayList("year","month")); + Map params1=new HashMap<>(); + StatsSetupConst.setColumnStatsState(params1, Lists.newArrayList("month","year")); + StatsSetupConst.setBasicStatsState(params1, String.valueOf(true)); + + System.out.println(params0.get(StatsSetupConst.COLUMN_STATS_ACCURATE)); + assertEquals(params0.get(StatsSetupConst.COLUMN_STATS_ACCURATE),params1.get(StatsSetupConst.COLUMN_STATS_ACCURATE)); + } }