diff --git common/src/java/org/apache/hadoop/hive/common/StatsSetupConst.java common/src/java/org/apache/hadoop/hive/common/StatsSetupConst.java index 25c7508..2043c39 100644 --- common/src/java/org/apache/hadoop/hive/common/StatsSetupConst.java +++ common/src/java/org/apache/hadoop/hive/common/StatsSetupConst.java @@ -17,19 +17,31 @@ */ package org.apache.hadoop.hive.common; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.hive.conf.HiveConf; -import org.json.JSONException; -import org.json.JSONObject; - -import java.util.LinkedHashMap; +import java.io.IOException; import java.util.List; import java.util.Map; import java.util.TreeMap; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.conf.HiveConf; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import com.fasterxml.jackson.annotation.JsonInclude; +import com.fasterxml.jackson.annotation.JsonProperty; +import com.fasterxml.jackson.core.JsonGenerator; +import com.fasterxml.jackson.core.JsonParser; +import com.fasterxml.jackson.core.JsonProcessingException; +import com.fasterxml.jackson.databind.DeserializationContext; +import com.fasterxml.jackson.databind.JsonDeserializer; +import com.fasterxml.jackson.databind.JsonSerializer; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.fasterxml.jackson.databind.ObjectReader; +import com.fasterxml.jackson.databind.ObjectWriter; +import com.fasterxml.jackson.databind.SerializerProvider; +import com.fasterxml.jackson.databind.annotation.JsonDeserialize; +import com.fasterxml.jackson.databind.annotation.JsonSerialize; + /** * A class that defines the constant strings used by the statistics implementation. @@ -144,35 +156,62 @@ public String getAggregator(Configuration conf) { public static final String[] TABLE_PARAMS_STATS_KEYS = new String[] { COLUMN_STATS_ACCURATE, NUM_FILES, TOTAL_SIZE,ROW_COUNT, RAW_DATA_SIZE, NUM_PARTITIONS}; + private static class ColumnStatsAccurate { + private static ObjectReader objectReader; + private static ObjectWriter objectWriter; + + static { + ObjectMapper objectMapper = new ObjectMapper(); + objectReader = objectMapper.readerFor(ColumnStatsAccurate.class); + objectWriter = objectMapper.writerFor(ColumnStatsAccurate.class); + } + + static class BooleanSerializer extends JsonSerializer { + + @Override + public void serialize(Boolean value, JsonGenerator jsonGenerator, + SerializerProvider serializerProvider) throws IOException, JsonProcessingException { + jsonGenerator.writeString(value.toString()); + } + } + + static class BooleanDeserializer extends JsonDeserializer { + + public Boolean deserialize(JsonParser jsonParser, + DeserializationContext deserializationContext) + throws IOException, JsonProcessingException { + return Boolean.valueOf(jsonParser.getValueAsString()); + } + } + + @JsonInclude(JsonInclude.Include.NON_DEFAULT) + @JsonSerialize(using = BooleanSerializer.class) + @JsonDeserialize(using = BooleanDeserializer.class) + @JsonProperty(BASIC_STATS) + boolean basicStats; + + @JsonInclude(JsonInclude.Include.NON_EMPTY) + @JsonProperty(COLUMN_STATS) + @JsonSerialize(contentUsing = BooleanSerializer.class) + @JsonDeserialize(contentUsing = BooleanDeserializer.class) + TreeMap columnStats = new TreeMap<>(); + + }; + public static boolean areBasicStatsUptoDate(Map params) { - JSONObject stats = parseStatsAcc(params.get(COLUMN_STATS_ACCURATE)); - if (stats != null && stats.has(BASIC_STATS)) { - return true; - } else { + if (params == null) { return false; } + ColumnStatsAccurate stats = parseStatsAcc(params.get(COLUMN_STATS_ACCURATE)); + return stats.basicStats; } public static boolean areColumnStatsUptoDate(Map params, String colName) { - JSONObject stats = parseStatsAcc(params.get(COLUMN_STATS_ACCURATE)); - try { - if (!stats.has(COLUMN_STATS)) { - return false; - } else { - JSONObject columns = stats.getJSONObject(COLUMN_STATS); - if (columns != null && columns.has(colName)) { - return true; - } else { - return false; - } - } - } catch (JSONException e) { - // For backward compatibility, if previous value can not be parsed to a - // json object, it will come here. - LOG.debug("In StatsSetupConst, JsonParser can not parse COLUMN_STATS."); + if (params == null) { return false; } - + ColumnStatsAccurate stats = parseStatsAcc(params.get(COLUMN_STATS_ACCURATE)); + return stats.columnStats.containsKey(colName); } // It will only throw JSONException when stats.put(BASIC_STATS, TRUE) @@ -180,55 +219,52 @@ public static boolean areColumnStatsUptoDate(Map params, String // note that set basic stats false will wipe out column stats too. public static void setBasicStatsState(Map params, String setting) { if (setting.equals(FALSE)) { - if (params != null && params.containsKey(COLUMN_STATS_ACCURATE)) { + if (params!=null && params.containsKey(COLUMN_STATS_ACCURATE)) { params.remove(COLUMN_STATS_ACCURATE); } - } else { - JSONObject stats = parseStatsAcc(params.get(COLUMN_STATS_ACCURATE)); - - try { - stats.put(BASIC_STATS, TRUE); - } catch (JSONException e) { - // impossible to throw any json exceptions. - LOG.trace(e.getMessage()); - } - params.put(COLUMN_STATS_ACCURATE, stats.toString()); + return; + } + if (params == null) { + throw new RuntimeException("params are null...cant set columnstatstate!"); + } + ColumnStatsAccurate stats = parseStatsAcc(params.get(COLUMN_STATS_ACCURATE)); + stats.basicStats = true; + try { + params.put(COLUMN_STATS_ACCURATE, ColumnStatsAccurate.objectWriter.writeValueAsString(stats)); + } catch (JsonProcessingException e) { + throw new RuntimeException("can't serialize column stats", e); } } public static void setColumnStatsState(Map params, List colNames) { - try { - JSONObject stats = parseStatsAcc(params.get(COLUMN_STATS_ACCURATE)); + if (params == null) { + throw new RuntimeException("params are null...cant set columnstatstate!"); + } + ColumnStatsAccurate stats = parseStatsAcc(params.get(COLUMN_STATS_ACCURATE)); - JSONObject colStats; - if (stats.has(COLUMN_STATS)) { - colStats = stats.getJSONObject(COLUMN_STATS); - } else { - colStats = new JSONObject(new TreeMap()); + for (String colName : colNames) { + if (!stats.columnStats.containsKey(colName)) { + stats.columnStats.put(colName, true); } - for (String colName : colNames) { - if (!colStats.has(colName)) { - colStats.put(colName, TRUE); - } - } - stats.put(COLUMN_STATS, colStats); - params.put(COLUMN_STATS_ACCURATE, stats.toString()); - } catch (JSONException e) { - // impossible to throw any json exceptions. + } + try { + params.put(COLUMN_STATS_ACCURATE, ColumnStatsAccurate.objectWriter.writeValueAsString(stats)); + } catch (JsonProcessingException e) { LOG.trace(e.getMessage()); } } public static void clearColumnStatsState(Map params) { - String statsAcc; - if (params != null && (statsAcc = params.get(COLUMN_STATS_ACCURATE)) != null) { - // statsAcc may not be jason format, which will throw exception - JSONObject stats = parseStatsAcc(statsAcc); - - if (stats.has(COLUMN_STATS)) { - stats.remove(COLUMN_STATS); - } - params.put(COLUMN_STATS_ACCURATE, stats.toString()); + if (params == null) { + return; + } + ColumnStatsAccurate stats = parseStatsAcc(params.get(COLUMN_STATS_ACCURATE)); + stats.columnStats.clear(); + + try { + params.put(COLUMN_STATS_ACCURATE, ColumnStatsAccurate.objectWriter.writeValueAsString(stats)); + } catch (JsonProcessingException e) { + LOG.trace(e.getMessage()); } } @@ -241,34 +277,18 @@ public static void setBasicStatsStateForCreateTable(Map params, setBasicStatsState(params, setting); } - private static JSONObject parseStatsAcc(String statsAcc) { + private static ColumnStatsAccurate parseStatsAcc(String statsAcc) { if (statsAcc == null) { - return new JSONObject(new LinkedHashMap()); - } else { - try { - return new JSONObject(statsAcc); - } catch (JSONException e) { - return statsAccUpgrade(statsAcc); - } + return new ColumnStatsAccurate(); } - } - - private static JSONObject statsAccUpgrade(String statsAcc) { - JSONObject stats; - // old format of statsAcc, e.g., TRUE or FALSE - LOG.debug("In StatsSetupConst, JsonParser can not parse statsAcc."); - stats = new JSONObject(new LinkedHashMap()); try { - if (statsAcc.equals(TRUE)) { - stats.put(BASIC_STATS, TRUE); - } else { - stats.put(BASIC_STATS, FALSE); + return ColumnStatsAccurate.objectReader.readValue(statsAcc); + } catch (Exception e) { + ColumnStatsAccurate ret = new ColumnStatsAccurate(); + if (TRUE.equalsIgnoreCase(statsAcc)) { + ret.basicStats = true; } - } catch (JSONException e1) { - // impossible to throw any json exceptions. - LOG.trace(e1.getMessage()); + return ret; } - return stats; } - } diff --git common/src/test/org/apache/hadoop/hive/common/TestStatsSetupConst.java common/src/test/org/apache/hadoop/hive/common/TestStatsSetupConst.java index 7a7ad42..792b862 100644 --- common/src/test/org/apache/hadoop/hive/common/TestStatsSetupConst.java +++ common/src/test/org/apache/hadoop/hive/common/TestStatsSetupConst.java @@ -19,6 +19,7 @@ package org.apache.hadoop.hive.common; import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNull; import java.util.HashMap; import java.util.Map; @@ -53,4 +54,57 @@ public void testSetBasicStatsState_none() { assertEquals("{\"BASIC_STATS\":\"true\"}",params.get(StatsSetupConst.COLUMN_STATS_ACCURATE)); } + @Test + public void testSetBasicStatsState_falseIsAbsent() { + Map params=new HashMap<>(); + StatsSetupConst.setBasicStatsState(params, String.valueOf(true)); + StatsSetupConst.setBasicStatsState(params, String.valueOf(false)); + assertNull(params.get(StatsSetupConst.COLUMN_STATS_ACCURATE)); + } + + // earlier implementation have quoted boolean values...so the new implementation should preserve this + @Test + public void testStatColumnEntriesCompat() { + Map params0=new HashMap<>(); + StatsSetupConst.setBasicStatsState(params0, String.valueOf(true)); + StatsSetupConst.setColumnStatsState(params0, Lists.newArrayList("Foo")); + + assertEquals("{\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"Foo\":\"true\"}}",params0.get(StatsSetupConst.COLUMN_STATS_ACCURATE)); + } + + @Test + public void testColumnEntries_orderIndependence() { + Map params0=new HashMap<>(); + StatsSetupConst.setBasicStatsState(params0, String.valueOf(true)); + StatsSetupConst.setColumnStatsState(params0, Lists.newArrayList("Foo","Bar")); + Map params1=new HashMap<>(); + StatsSetupConst.setColumnStatsState(params1, Lists.newArrayList("Bar","Foo")); + StatsSetupConst.setBasicStatsState(params1, String.valueOf(true)); + + assertEquals(params0.get(StatsSetupConst.COLUMN_STATS_ACCURATE),params1.get(StatsSetupConst.COLUMN_STATS_ACCURATE)); + } + + @Test + public void testColumnEntries_orderIndependence2() { + Map params0=new HashMap<>(); + // in case jackson is able to deserialize...it may use a different implementation for the map - which may not preserve order + StatsSetupConst.setBasicStatsState(params0, String.valueOf(true)); + StatsSetupConst.setColumnStatsState(params0, Lists.newArrayList("year")); + StatsSetupConst.setColumnStatsState(params0, Lists.newArrayList("year","month")); + Map params1=new HashMap<>(); + StatsSetupConst.setColumnStatsState(params1, Lists.newArrayList("month","year")); + StatsSetupConst.setBasicStatsState(params1, String.valueOf(true)); + + System.out.println(params0.get(StatsSetupConst.COLUMN_STATS_ACCURATE)); + assertEquals(params0.get(StatsSetupConst.COLUMN_STATS_ACCURATE),params1.get(StatsSetupConst.COLUMN_STATS_ACCURATE)); + } + + // FIXME: current objective is to keep the previous outputs...but this is possibly bad.. + @Test + public void testColumnEntries_areKept_whenBasicIsAbsent() { + Map params=new HashMap<>(); + StatsSetupConst.setBasicStatsState(params, String.valueOf(false)); + StatsSetupConst.setColumnStatsState(params, Lists.newArrayList("Foo")); + assertEquals("{\"COLUMN_STATS\":{\"Foo\":\"true\"}}",params.get(StatsSetupConst.COLUMN_STATS_ACCURATE)); + } }