diff --git a/ql/src/java/org/apache/hadoop/hive/ql/metadata/formatting/MetaDataFormatUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/metadata/formatting/MetaDataFormatUtils.java index a1ec52fa89..fcaa83e6b8 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/metadata/formatting/MetaDataFormatUtils.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/metadata/formatting/MetaDataFormatUtils.java @@ -22,6 +22,7 @@ import org.apache.commons.lang.StringEscapeUtils; import org.apache.hadoop.hive.common.StatsSetupConst; import org.apache.hadoop.hive.common.type.HiveDecimal; +import org.apache.hadoop.hive.common.type.Timestamp; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.conf.HiveConf.ConfVars; import org.apache.hadoop.hive.metastore.TableType; @@ -37,6 +38,7 @@ import org.apache.hadoop.hive.metastore.api.LongColumnStatsData; import org.apache.hadoop.hive.metastore.api.StorageDescriptor; import org.apache.hadoop.hive.metastore.api.StringColumnStatsData; +import org.apache.hadoop.hive.metastore.api.TimestampColumnStatsData; import org.apache.hadoop.hive.metastore.api.WMFullResourcePlan; import org.apache.hadoop.hive.metastore.api.WMMapping; import org.apache.hadoop.hive.metastore.api.WMPool; @@ -56,6 +58,7 @@ import org.apache.hadoop.hive.ql.metadata.ForeignKeyInfo.ForeignKeyCol; import org.apache.hadoop.hive.ql.plan.PlanUtils; import org.apache.hadoop.hive.serde2.io.DateWritableV2; +import org.apache.hadoop.hive.serde2.io.TimestampWritableV2; import org.apache.hive.common.util.HiveStringUtils; import com.google.common.collect.Lists; @@ -112,6 +115,16 @@ private static String convertToString(org.apache.hadoop.hive.metastore.api.Date return writableValue.toString(); } + private static String convertToString(org.apache.hadoop.hive.metastore.api.Timestamp val) { + if (val == null) { + return ""; + } + + TimestampWritableV2 writableValue = new TimestampWritableV2( + Timestamp.ofEpochSecond(val.getSecondsSinceEpoch())); + return writableValue.toString(); + } + private static String convertToString(byte[] buf) { if (buf == null || buf.length == 0) { return ""; @@ -697,6 +710,13 @@ static void formatOutput(String name, String value, StringBuilder tableInfo, "", "", "", "", convertToString(dcsd.getBitVectors()))); + } else if (csd.isSetTimestampStats()) { + TimestampColumnStatsData tcsd = csd.getTimestampStats(); + ret.addAll(Lists.newArrayList( convertToString(tcsd.getLowValue()), convertToString(tcsd.getHighValue()), + "" + tcsd.getNumNulls(), "" + tcsd.getNumDVs(), + "", "", + "", "", + convertToString(tcsd.getBitVectors()))); } // @formatter:on } else { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java index 4fc73620fe..2abe1ea98e 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java @@ -20,6 +20,7 @@ import java.lang.reflect.Field; import java.math.BigDecimal; +import java.math.RoundingMode; import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; @@ -32,6 +33,7 @@ import java.util.Set; import java.util.Stack; +import org.apache.hadoop.hive.common.type.Timestamp; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.conf.HiveConf.ConfVars; import org.apache.hadoop.hive.ql.Context; @@ -108,6 +110,7 @@ import org.apache.hadoop.hive.ql.udf.generic.GenericUDFStruct; import org.apache.hadoop.hive.serde.serdeConstants; import org.apache.hadoop.hive.serde2.io.DateWritable; +import org.apache.hadoop.hive.serde2.io.TimestampWritableV2; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; @@ -625,6 +628,13 @@ public RangeResult intersect(ExprNodeDesc exprNode) { int minValue = range.minValue.intValue(); return RangeResult.of(value < minValue, value < maxValue, value == minValue, value == maxValue); } + case serdeConstants.TIMESTAMP_TYPE_NAME: { + TimestampWritableV2 timestampWritable = new TimestampWritableV2(Timestamp.valueOf(boundValue)); + long value = timestampWritable.getTimestamp().toEpochSecond(); + long maxValue = range.maxValue.longValue(); + long minValue = range.minValue.longValue(); + return RangeResult.of(value < minValue, value < maxValue, value == minValue, value == maxValue); + } case serdeConstants.BIGINT_TYPE_NAME: { long value = Long.parseLong(boundValue); long maxValue = range.maxValue.longValue(); @@ -967,6 +977,9 @@ private long evaluateComparator(Statistics stats, AnnotateStatsProcCtx aspCtx, E if (minValue > value) { return 0; } + // Assuming uniform distribution, we can use the range to calculate + // new estimate for the number of rows + return Math.round(((double) (value - minValue) / (maxValue - minValue)) * numRows); } else { if (minValue >= value) { return numRows; @@ -974,6 +987,9 @@ private long evaluateComparator(Statistics stats, AnnotateStatsProcCtx aspCtx, E if (maxValue < value) { return 0; } + // Assuming uniform distribution, we can use the range to calculate + // new estimate for the number of rows + return Math.round(((double) (maxValue - value) / (maxValue - minValue)) * numRows); } } else if (colTypeLowerCase.equals(serdeConstants.SMALLINT_TYPE_NAME)) { short value = Short.parseShort(boundValue); @@ -986,6 +1002,9 @@ private long evaluateComparator(Statistics stats, AnnotateStatsProcCtx aspCtx, E if (minValue > value) { return 0; } + // Assuming uniform distribution, we can use the range to calculate + // new estimate for the number of rows + return Math.round(((double) (value - minValue) / (maxValue - minValue)) * numRows); } else { if (minValue >= value) { return numRows; @@ -993,11 +1012,14 @@ private long evaluateComparator(Statistics stats, AnnotateStatsProcCtx aspCtx, E if (maxValue < value) { return 0; } + // Assuming uniform distribution, we can use the range to calculate + // new estimate for the number of rows + return Math.round(((double) (maxValue - value) / (maxValue - minValue)) * numRows); } } else if (colTypeLowerCase.equals(serdeConstants.INT_TYPE_NAME) || colTypeLowerCase.equals(serdeConstants.DATE_TYPE_NAME)) { int value; - if (colTypeLowerCase == serdeConstants.DATE_TYPE_NAME) { + if (colTypeLowerCase.equals(serdeConstants.DATE_TYPE_NAME)) { DateWritable writableVal = new DateWritable(java.sql.Date.valueOf(boundValue)); value = writableVal.getDays(); } else { @@ -1013,6 +1035,9 @@ private long evaluateComparator(Statistics stats, AnnotateStatsProcCtx aspCtx, E if (minValue > value) { return 0; } + // Assuming uniform distribution, we can use the range to calculate + // new estimate for the number of rows + return Math.round(((double) (value - minValue) / (maxValue - minValue)) * numRows); } else { if (minValue >= value) { return numRows; @@ -1020,9 +1045,19 @@ private long evaluateComparator(Statistics stats, AnnotateStatsProcCtx aspCtx, E if (maxValue < value) { return 0; } + // Assuming uniform distribution, we can use the range to calculate + // new estimate for the number of rows + return Math.round(((double) (maxValue - value) / (maxValue - minValue)) * numRows); + } + } else if (colTypeLowerCase.equals(serdeConstants.BIGINT_TYPE_NAME) || + colTypeLowerCase.equals(serdeConstants.TIMESTAMP_TYPE_NAME)) { + long value; + if (colTypeLowerCase.equals(serdeConstants.TIMESTAMP_TYPE_NAME)) { + TimestampWritableV2 timestampWritable = new TimestampWritableV2(Timestamp.valueOf(boundValue)); + value = timestampWritable.getTimestamp().toEpochSecond(); + } else { + value = Long.parseLong(boundValue); } - } else if (colTypeLowerCase.equals(serdeConstants.BIGINT_TYPE_NAME)) { - long value = Long.parseLong(boundValue); long maxValue = cs.getRange().maxValue.longValue(); long minValue = cs.getRange().minValue.longValue(); if (upperBound) { @@ -1032,6 +1067,9 @@ private long evaluateComparator(Statistics stats, AnnotateStatsProcCtx aspCtx, E if (minValue > value) { return 0; } + // Assuming uniform distribution, we can use the range to calculate + // new estimate for the number of rows + return Math.round(((double) (value - minValue) / (maxValue - minValue)) * numRows); } else { if (minValue >= value) { return numRows; @@ -1039,6 +1077,9 @@ private long evaluateComparator(Statistics stats, AnnotateStatsProcCtx aspCtx, E if (maxValue < value) { return 0; } + // Assuming uniform distribution, we can use the range to calculate + // new estimate for the number of rows + return Math.round(((double) (maxValue - value) / (maxValue - minValue)) * numRows); } } else if (colTypeLowerCase.equals(serdeConstants.FLOAT_TYPE_NAME)) { float value = Float.parseFloat(boundValue); @@ -1051,6 +1092,9 @@ private long evaluateComparator(Statistics stats, AnnotateStatsProcCtx aspCtx, E if (minValue > value) { return 0; } + // Assuming uniform distribution, we can use the range to calculate + // new estimate for the number of rows + return Math.round(((double) (value - minValue) / (maxValue - minValue)) * numRows); } else { if (minValue >= value) { return numRows; @@ -1058,6 +1102,9 @@ private long evaluateComparator(Statistics stats, AnnotateStatsProcCtx aspCtx, E if (maxValue < value) { return 0; } + // Assuming uniform distribution, we can use the range to calculate + // new estimate for the number of rows + return Math.round(((double) (maxValue - value) / (maxValue - minValue)) * numRows); } } else if (colTypeLowerCase.equals(serdeConstants.DOUBLE_TYPE_NAME)) { double value = Double.parseDouble(boundValue); @@ -1070,6 +1117,9 @@ private long evaluateComparator(Statistics stats, AnnotateStatsProcCtx aspCtx, E if (minValue > value) { return 0; } + // Assuming uniform distribution, we can use the range to calculate + // new estimate for the number of rows + return Math.round(((double) (value - minValue) / (maxValue - minValue)) * numRows); } else { if (minValue >= value) { return numRows; @@ -1077,6 +1127,9 @@ private long evaluateComparator(Statistics stats, AnnotateStatsProcCtx aspCtx, E if (maxValue < value) { return 0; } + // Assuming uniform distribution, we can use the range to calculate + // new estimate for the number of rows + return Math.round(((double) (maxValue - value) / (maxValue - minValue)) * numRows); } } else if (colTypeLowerCase.startsWith(serdeConstants.DECIMAL_TYPE_NAME)) { BigDecimal value = new BigDecimal(boundValue); @@ -1091,6 +1144,12 @@ private long evaluateComparator(Statistics stats, AnnotateStatsProcCtx aspCtx, E if (minComparison < 0) { return 0; } + // Assuming uniform distribution, we can use the range to calculate + // new estimate for the number of rows + return Math.round( + ((value.subtract(minValue)).divide(maxValue.subtract(minValue), RoundingMode.UP)) + .multiply(BigDecimal.valueOf(numRows)) + .doubleValue()); } else { if (minComparison <= 0) { return numRows; @@ -1098,6 +1157,12 @@ private long evaluateComparator(Statistics stats, AnnotateStatsProcCtx aspCtx, E if (maxComparison > 0) { return 0; } + // Assuming uniform distribution, we can use the range to calculate + // new estimate for the number of rows + return Math.round( + ((maxValue.subtract(value)).divide(maxValue.subtract(minValue), RoundingMode.UP)) + .multiply(BigDecimal.valueOf(numRows)) + .doubleValue()); } } } catch (NumberFormatException nfe) { @@ -1564,6 +1629,7 @@ private static void computeAggregateColumnMinMax(ColStatistics cs, HiveConf conf case serdeConstants.DATE_TYPE_NAME: case serdeConstants.INT_TYPE_NAME: case serdeConstants.BIGINT_TYPE_NAME: + case serdeConstants.TIMESTAMP_TYPE_NAME: long maxValueLong = range.maxValue.longValue(); long minValueLong = range.minValue.longValue(); // If min value is less or equal to max value (legal) diff --git a/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java index 1795ae5626..29952d19bb 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java @@ -853,8 +853,15 @@ public static ColStatistics getColStatistics(ColumnStatisticsObj cso, String tab } else if (colTypeLowerCase.equals(serdeConstants.BINARY_TYPE_NAME)) { cs.setAvgColLen(csd.getBinaryStats().getAvgColLen()); cs.setNumNulls(csd.getBinaryStats().getNumNulls()); - } else if (colTypeLowerCase.equals(serdeConstants.TIMESTAMP_TYPE_NAME) || - colTypeLowerCase.equals(serdeConstants.TIMESTAMPLOCALTZ_TYPE_NAME)) { + } else if (colTypeLowerCase.equals(serdeConstants.TIMESTAMP_TYPE_NAME)) { + cs.setAvgColLen(JavaDataModel.get().lengthOfTimestamp()); + cs.setNumNulls(csd.getTimestampStats().getNumNulls()); + Long lowVal = (csd.getTimestampStats().getLowValue() != null) ? csd.getTimestampStats().getLowValue() + .getSecondsSinceEpoch() : null; + Long highVal = (csd.getTimestampStats().getHighValue() != null) ? csd.getTimestampStats().getHighValue() + .getSecondsSinceEpoch() : null; + cs.setRange(lowVal, highVal); + } else if (colTypeLowerCase.equals(serdeConstants.TIMESTAMPLOCALTZ_TYPE_NAME)) { cs.setAvgColLen(JavaDataModel.get().lengthOfTimestamp()); } else if (colTypeLowerCase.startsWith(serdeConstants.DECIMAL_TYPE_NAME)) { cs.setAvgColLen(JavaDataModel.get().lengthOfDecimal()); @@ -932,8 +939,11 @@ else if(colTypeLowerCase.equals(serdeConstants.SMALLINT_TYPE_NAME)){ cs.setNumTrues(Math.max(1, numRows/2)); cs.setNumFalses(Math.max(1, numRows/2)); cs.setAvgColLen(JavaDataModel.get().primitive1()); - } else if (colTypeLowerCase.equals(serdeConstants.TIMESTAMP_TYPE_NAME) || - colTypeLowerCase.equals(serdeConstants.TIMESTAMPLOCALTZ_TYPE_NAME)) { + } else if (colTypeLowerCase.equals(serdeConstants.TIMESTAMP_TYPE_NAME)) { + cs.setAvgColLen(JavaDataModel.get().lengthOfTimestamp()); + // epoch, seconds since epoch + cs.setRange(0, 2177452799L); + } else if (colTypeLowerCase.equals(serdeConstants.TIMESTAMPLOCALTZ_TYPE_NAME)) { cs.setAvgColLen(JavaDataModel.get().lengthOfTimestamp()); } else if (colTypeLowerCase.startsWith(serdeConstants.DECIMAL_TYPE_NAME)) { cs.setAvgColLen(JavaDataModel.get().lengthOfDecimal()); diff --git a/ql/src/test/queries/clientpositive/timestamp_comparison3.q b/ql/src/test/queries/clientpositive/timestamp_comparison3.q new file mode 100644 index 0000000000..145d89bba8 --- /dev/null +++ b/ql/src/test/queries/clientpositive/timestamp_comparison3.q @@ -0,0 +1,32 @@ +set hive.fetch.task.conversion=none; +set hive.stats.fetch.column.stats=true; + +create database timestamp_test_n123; +create table timestamp_test_n123.onecolumntable (ts timestamp); + +insert into timestamp_test_n123.onecolumntable values +('2015-01-01 00:00:00'), +('2015-01-02 00:00:00'), +('2015-01-03 00:00:00'), +('2015-01-04 00:00:00'), +('2015-01-05 00:00:00'); + +describe formatted timestamp_test_n123.onecolumntable ts; + +explain +select ts from timestamp_test_n123.onecolumntable +where ts >= cast('2015-01-02 00:00:00' as timestamp) + and ts <= cast('2015-01-04 00:00:00' as timestamp); + +explain +select ts from timestamp_test_n123.onecolumntable +where ts >= cast('2015-01-02 00:00:00' as timestamp) + and ts <= cast('2015-01-03 00:00:00' as timestamp); + +explain +select ts from timestamp_test_n123.onecolumntable +where ts >= cast('2015-01-01 00:00:00' as timestamp) + and ts <= cast('2015-01-08 00:00:00' as timestamp); + +drop table timestamp_test_n123.onecolumntable; +drop database timestamp_test_n123; diff --git a/ql/src/test/results/clientpositive/timestamp_comparison3.q.out b/ql/src/test/results/clientpositive/timestamp_comparison3.q.out new file mode 100644 index 0000000000..6d01035f71 --- /dev/null +++ b/ql/src/test/results/clientpositive/timestamp_comparison3.q.out @@ -0,0 +1,212 @@ +PREHOOK: query: create database timestamp_test_n123 +PREHOOK: type: CREATEDATABASE +PREHOOK: Output: database:timestamp_test_n123 +POSTHOOK: query: create database timestamp_test_n123 +POSTHOOK: type: CREATEDATABASE +POSTHOOK: Output: database:timestamp_test_n123 +PREHOOK: query: create table timestamp_test_n123.onecolumntable (ts timestamp) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:timestamp_test_n123 +PREHOOK: Output: timestamp_test_n123@onecolumntable +POSTHOOK: query: create table timestamp_test_n123.onecolumntable (ts timestamp) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:timestamp_test_n123 +POSTHOOK: Output: timestamp_test_n123@onecolumntable +PREHOOK: query: insert into timestamp_test_n123.onecolumntable values +('2015-01-01 00:00:00'), +('2015-01-02 00:00:00'), +('2015-01-03 00:00:00'), +('2015-01-04 00:00:00'), +('2015-01-05 00:00:00') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: timestamp_test_n123@onecolumntable +POSTHOOK: query: insert into timestamp_test_n123.onecolumntable values +('2015-01-01 00:00:00'), +('2015-01-02 00:00:00'), +('2015-01-03 00:00:00'), +('2015-01-04 00:00:00'), +('2015-01-05 00:00:00') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: timestamp_test_n123@onecolumntable +POSTHOOK: Lineage: onecolumntable.ts SCRIPT [] +PREHOOK: query: describe formatted timestamp_test_n123.onecolumntable ts +PREHOOK: type: DESCTABLE +PREHOOK: Input: timestamp_test_n123@onecolumntable +POSTHOOK: query: describe formatted timestamp_test_n123.onecolumntable ts +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: timestamp_test_n123@onecolumntable +col_name ts +data_type timestamp +min 2015-01-01 00:00:00 +max 2015-01-05 00:00:00 +num_nulls 0 +distinct_count 5 +avg_col_len +max_col_len +num_trues +num_falses +bitVector HL +comment from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"ts\":\"true\"}} +PREHOOK: query: explain +select ts from timestamp_test_n123.onecolumntable +where ts >= cast('2015-01-02 00:00:00' as timestamp) + and ts <= cast('2015-01-04 00:00:00' as timestamp) +PREHOOK: type: QUERY +PREHOOK: Input: timestamp_test_n123@onecolumntable +#### A masked pattern was here #### +POSTHOOK: query: explain +select ts from timestamp_test_n123.onecolumntable +where ts >= cast('2015-01-02 00:00:00' as timestamp) + and ts <= cast('2015-01-04 00:00:00' as timestamp) +POSTHOOK: type: QUERY +POSTHOOK: Input: timestamp_test_n123@onecolumntable +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: onecolumntable + filterExpr: ts BETWEEN TIMESTAMP'2015-01-02 00:00:00' AND TIMESTAMP'2015-01-04 00:00:00' (type: boolean) + Statistics: Num rows: 5 Data size: 200 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: ts BETWEEN TIMESTAMP'2015-01-02 00:00:00' AND TIMESTAMP'2015-01-04 00:00:00' (type: boolean) + Statistics: Num rows: 3 Data size: 120 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: ts (type: timestamp) + outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 120 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 3 Data size: 120 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain +select ts from timestamp_test_n123.onecolumntable +where ts >= cast('2015-01-02 00:00:00' as timestamp) + and ts <= cast('2015-01-03 00:00:00' as timestamp) +PREHOOK: type: QUERY +PREHOOK: Input: timestamp_test_n123@onecolumntable +#### A masked pattern was here #### +POSTHOOK: query: explain +select ts from timestamp_test_n123.onecolumntable +where ts >= cast('2015-01-02 00:00:00' as timestamp) + and ts <= cast('2015-01-03 00:00:00' as timestamp) +POSTHOOK: type: QUERY +POSTHOOK: Input: timestamp_test_n123@onecolumntable +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: onecolumntable + filterExpr: ts BETWEEN TIMESTAMP'2015-01-02 00:00:00' AND TIMESTAMP'2015-01-03 00:00:00' (type: boolean) + Statistics: Num rows: 5 Data size: 200 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: ts BETWEEN TIMESTAMP'2015-01-02 00:00:00' AND TIMESTAMP'2015-01-03 00:00:00' (type: boolean) + Statistics: Num rows: 2 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: ts (type: timestamp) + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain +select ts from timestamp_test_n123.onecolumntable +where ts >= cast('2015-01-01 00:00:00' as timestamp) + and ts <= cast('2015-01-08 00:00:00' as timestamp) +PREHOOK: type: QUERY +PREHOOK: Input: timestamp_test_n123@onecolumntable +#### A masked pattern was here #### +POSTHOOK: query: explain +select ts from timestamp_test_n123.onecolumntable +where ts >= cast('2015-01-01 00:00:00' as timestamp) + and ts <= cast('2015-01-08 00:00:00' as timestamp) +POSTHOOK: type: QUERY +POSTHOOK: Input: timestamp_test_n123@onecolumntable +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: onecolumntable + filterExpr: ts BETWEEN TIMESTAMP'2015-01-01 00:00:00' AND TIMESTAMP'2015-01-08 00:00:00' (type: boolean) + Statistics: Num rows: 5 Data size: 200 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: ts BETWEEN TIMESTAMP'2015-01-01 00:00:00' AND TIMESTAMP'2015-01-08 00:00:00' (type: boolean) + Statistics: Num rows: 5 Data size: 200 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: ts (type: timestamp) + outputColumnNames: _col0 + Statistics: Num rows: 5 Data size: 200 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 5 Data size: 200 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: drop table timestamp_test_n123.onecolumntable +PREHOOK: type: DROPTABLE +PREHOOK: Input: timestamp_test_n123@onecolumntable +PREHOOK: Output: timestamp_test_n123@onecolumntable +POSTHOOK: query: drop table timestamp_test_n123.onecolumntable +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: timestamp_test_n123@onecolumntable +POSTHOOK: Output: timestamp_test_n123@onecolumntable +PREHOOK: query: drop database timestamp_test_n123 +PREHOOK: type: DROPDATABASE +PREHOOK: Input: database:timestamp_test_n123 +PREHOOK: Output: database:timestamp_test_n123 +POSTHOOK: query: drop database timestamp_test_n123 +POSTHOOK: type: DROPDATABASE +POSTHOOK: Input: database:timestamp_test_n123 +POSTHOOK: Output: database:timestamp_test_n123 diff --git a/standalone-metastore/metastore-common/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/ColumnStatisticsData.java b/standalone-metastore/metastore-common/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/ColumnStatisticsData.java index 9a2e4f4d56..a92a86758d 100644 --- a/standalone-metastore/metastore-common/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/ColumnStatisticsData.java +++ b/standalone-metastore/metastore-common/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/ColumnStatisticsData.java @@ -43,6 +43,7 @@ private static final org.apache.thrift.protocol.TField BINARY_STATS_FIELD_DESC = new org.apache.thrift.protocol.TField("binaryStats", org.apache.thrift.protocol.TType.STRUCT, (short)5); private static final org.apache.thrift.protocol.TField DECIMAL_STATS_FIELD_DESC = new org.apache.thrift.protocol.TField("decimalStats", org.apache.thrift.protocol.TType.STRUCT, (short)6); private static final org.apache.thrift.protocol.TField DATE_STATS_FIELD_DESC = new org.apache.thrift.protocol.TField("dateStats", org.apache.thrift.protocol.TType.STRUCT, (short)7); + private static final org.apache.thrift.protocol.TField TIMESTAMP_STATS_FIELD_DESC = new org.apache.thrift.protocol.TField("timestampStats", org.apache.thrift.protocol.TType.STRUCT, (short)8); /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ public enum _Fields implements org.apache.thrift.TFieldIdEnum { @@ -52,7 +53,8 @@ STRING_STATS((short)4, "stringStats"), BINARY_STATS((short)5, "binaryStats"), DECIMAL_STATS((short)6, "decimalStats"), - DATE_STATS((short)7, "dateStats"); + DATE_STATS((short)7, "dateStats"), + TIMESTAMP_STATS((short)8, "timestampStats"); private static final Map byName = new HashMap(); @@ -81,6 +83,8 @@ public static _Fields findByThriftId(int fieldId) { return DECIMAL_STATS; case 7: // DATE_STATS return DATE_STATS; + case 8: // TIMESTAMP_STATS + return TIMESTAMP_STATS; default: return null; } @@ -137,6 +141,8 @@ public String getFieldName() { new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, DecimalColumnStatsData.class))); tmpMap.put(_Fields.DATE_STATS, new org.apache.thrift.meta_data.FieldMetaData("dateStats", org.apache.thrift.TFieldRequirementType.DEFAULT, new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, DateColumnStatsData.class))); + tmpMap.put(_Fields.TIMESTAMP_STATS, new org.apache.thrift.meta_data.FieldMetaData("timestampStats", org.apache.thrift.TFieldRequirementType.DEFAULT, + new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TimestampColumnStatsData.class))); metaDataMap = Collections.unmodifiableMap(tmpMap); org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(ColumnStatisticsData.class, metaDataMap); } @@ -198,6 +204,12 @@ public static ColumnStatisticsData dateStats(DateColumnStatsData value) { return x; } + public static ColumnStatisticsData timestampStats(TimestampColumnStatsData value) { + ColumnStatisticsData x = new ColumnStatisticsData(); + x.setTimestampStats(value); + return x; + } + @Override protected void checkType(_Fields setField, Object value) throws ClassCastException { @@ -237,6 +249,11 @@ protected void checkType(_Fields setField, Object value) throws ClassCastExcepti break; } throw new ClassCastException("Was expecting value of type DateColumnStatsData for field 'dateStats', but got " + value.getClass().getSimpleName()); + case TIMESTAMP_STATS: + if (value instanceof TimestampColumnStatsData) { + break; + } + throw new ClassCastException("Was expecting value of type TimestampColumnStatsData for field 'timestampStats', but got " + value.getClass().getSimpleName()); default: throw new IllegalArgumentException("Unknown field id " + setField); } @@ -317,6 +334,16 @@ protected Object standardSchemeReadValue(org.apache.thrift.protocol.TProtocol ip org.apache.thrift.protocol.TProtocolUtil.skip(iprot, field.type); return null; } + case TIMESTAMP_STATS: + if (field.type == TIMESTAMP_STATS_FIELD_DESC.type) { + TimestampColumnStatsData timestampStats; + timestampStats = new TimestampColumnStatsData(); + timestampStats.read(iprot); + return timestampStats; + } else { + org.apache.thrift.protocol.TProtocolUtil.skip(iprot, field.type); + return null; + } default: throw new IllegalStateException("setField wasn't null, but didn't match any of the case statements!"); } @@ -357,6 +384,10 @@ protected void standardSchemeWriteValue(org.apache.thrift.protocol.TProtocol opr DateColumnStatsData dateStats = (DateColumnStatsData)value_; dateStats.write(oprot); return; + case TIMESTAMP_STATS: + TimestampColumnStatsData timestampStats = (TimestampColumnStatsData)value_; + timestampStats.write(oprot); + return; default: throw new IllegalStateException("Cannot write union with unknown field " + setField_); } @@ -402,6 +433,11 @@ protected Object tupleSchemeReadValue(org.apache.thrift.protocol.TProtocol iprot dateStats = new DateColumnStatsData(); dateStats.read(iprot); return dateStats; + case TIMESTAMP_STATS: + TimestampColumnStatsData timestampStats; + timestampStats = new TimestampColumnStatsData(); + timestampStats.read(iprot); + return timestampStats; default: throw new IllegalStateException("setField wasn't null, but didn't match any of the case statements!"); } @@ -441,6 +477,10 @@ protected void tupleSchemeWriteValue(org.apache.thrift.protocol.TProtocol oprot) DateColumnStatsData dateStats = (DateColumnStatsData)value_; dateStats.write(oprot); return; + case TIMESTAMP_STATS: + TimestampColumnStatsData timestampStats = (TimestampColumnStatsData)value_; + timestampStats.write(oprot); + return; default: throw new IllegalStateException("Cannot write union with unknown field " + setField_); } @@ -463,6 +503,8 @@ protected void tupleSchemeWriteValue(org.apache.thrift.protocol.TProtocol oprot) return DECIMAL_STATS_FIELD_DESC; case DATE_STATS: return DATE_STATS_FIELD_DESC; + case TIMESTAMP_STATS: + return TIMESTAMP_STATS_FIELD_DESC; default: throw new IllegalArgumentException("Unknown field id " + setField); } @@ -581,6 +623,20 @@ public void setDateStats(DateColumnStatsData value) { value_ = value; } + public TimestampColumnStatsData getTimestampStats() { + if (getSetField() == _Fields.TIMESTAMP_STATS) { + return (TimestampColumnStatsData)getFieldValue(); + } else { + throw new RuntimeException("Cannot get field 'timestampStats' because union is currently set to " + getFieldDesc(getSetField()).name); + } + } + + public void setTimestampStats(TimestampColumnStatsData value) { + if (value == null) throw new NullPointerException(); + setField_ = _Fields.TIMESTAMP_STATS; + value_ = value; + } + public boolean isSetBooleanStats() { return setField_ == _Fields.BOOLEAN_STATS; } @@ -616,6 +672,11 @@ public boolean isSetDateStats() { } + public boolean isSetTimestampStats() { + return setField_ == _Fields.TIMESTAMP_STATS; + } + + public boolean equals(Object other) { if (other instanceof ColumnStatisticsData) { return equals((ColumnStatisticsData)other); diff --git a/standalone-metastore/metastore-common/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/Timestamp.java b/standalone-metastore/metastore-common/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/Timestamp.java new file mode 100644 index 0000000000..b43eb1ca1b --- /dev/null +++ b/standalone-metastore/metastore-common/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/Timestamp.java @@ -0,0 +1,387 @@ +/** + * Autogenerated by Thrift Compiler (0.9.3) + * + * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING + * @generated + */ +package org.apache.hadoop.hive.metastore.api; + +import org.apache.thrift.scheme.IScheme; +import org.apache.thrift.scheme.SchemeFactory; +import org.apache.thrift.scheme.StandardScheme; + +import org.apache.thrift.scheme.TupleScheme; +import org.apache.thrift.protocol.TTupleProtocol; +import org.apache.thrift.protocol.TProtocolException; +import org.apache.thrift.EncodingUtils; +import org.apache.thrift.TException; +import org.apache.thrift.async.AsyncMethodCallback; +import org.apache.thrift.server.AbstractNonblockingServer.*; +import java.util.List; +import java.util.ArrayList; +import java.util.Map; +import java.util.HashMap; +import java.util.EnumMap; +import java.util.Set; +import java.util.HashSet; +import java.util.EnumSet; +import java.util.Collections; +import java.util.BitSet; +import java.nio.ByteBuffer; +import java.util.Arrays; +import javax.annotation.Generated; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +@SuppressWarnings({"cast", "rawtypes", "serial", "unchecked"}) +@Generated(value = "Autogenerated by Thrift Compiler (0.9.3)") +@org.apache.hadoop.classification.InterfaceAudience.Public @org.apache.hadoop.classification.InterfaceStability.Stable public class Timestamp implements org.apache.thrift.TBase, java.io.Serializable, Cloneable, Comparable { + private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("Timestamp"); + + private static final org.apache.thrift.protocol.TField SECONDS_SINCE_EPOCH_FIELD_DESC = new org.apache.thrift.protocol.TField("secondsSinceEpoch", org.apache.thrift.protocol.TType.I64, (short)1); + + private static final Map, SchemeFactory> schemes = new HashMap, SchemeFactory>(); + static { + schemes.put(StandardScheme.class, new TimestampStandardSchemeFactory()); + schemes.put(TupleScheme.class, new TimestampTupleSchemeFactory()); + } + + private long secondsSinceEpoch; // required + + /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ + public enum _Fields implements org.apache.thrift.TFieldIdEnum { + SECONDS_SINCE_EPOCH((short)1, "secondsSinceEpoch"); + + private static final Map byName = new HashMap(); + + static { + for (_Fields field : EnumSet.allOf(_Fields.class)) { + byName.put(field.getFieldName(), field); + } + } + + /** + * Find the _Fields constant that matches fieldId, or null if its not found. + */ + public static _Fields findByThriftId(int fieldId) { + switch(fieldId) { + case 1: // SECONDS_SINCE_EPOCH + return SECONDS_SINCE_EPOCH; + default: + return null; + } + } + + /** + * Find the _Fields constant that matches fieldId, throwing an exception + * if it is not found. + */ + public static _Fields findByThriftIdOrThrow(int fieldId) { + _Fields fields = findByThriftId(fieldId); + if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!"); + return fields; + } + + /** + * Find the _Fields constant that matches name, or null if its not found. + */ + public static _Fields findByName(String name) { + return byName.get(name); + } + + private final short _thriftId; + private final String _fieldName; + + _Fields(short thriftId, String fieldName) { + _thriftId = thriftId; + _fieldName = fieldName; + } + + public short getThriftFieldId() { + return _thriftId; + } + + public String getFieldName() { + return _fieldName; + } + } + + // isset id assignments + private static final int __SECONDSSINCEEPOCH_ISSET_ID = 0; + private byte __isset_bitfield = 0; + public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; + static { + Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); + tmpMap.put(_Fields.SECONDS_SINCE_EPOCH, new org.apache.thrift.meta_data.FieldMetaData("secondsSinceEpoch", org.apache.thrift.TFieldRequirementType.REQUIRED, + new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.I64))); + metaDataMap = Collections.unmodifiableMap(tmpMap); + org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(Timestamp.class, metaDataMap); + } + + public Timestamp() { + } + + public Timestamp( + long secondsSinceEpoch) + { + this(); + this.secondsSinceEpoch = secondsSinceEpoch; + setSecondsSinceEpochIsSet(true); + } + + /** + * Performs a deep copy on other. + */ + public Timestamp(Timestamp other) { + __isset_bitfield = other.__isset_bitfield; + this.secondsSinceEpoch = other.secondsSinceEpoch; + } + + public Timestamp deepCopy() { + return new Timestamp(this); + } + + @Override + public void clear() { + setSecondsSinceEpochIsSet(false); + this.secondsSinceEpoch = 0; + } + + public long getSecondsSinceEpoch() { + return this.secondsSinceEpoch; + } + + public void setSecondsSinceEpoch(long secondsSinceEpoch) { + this.secondsSinceEpoch = secondsSinceEpoch; + setSecondsSinceEpochIsSet(true); + } + + public void unsetSecondsSinceEpoch() { + __isset_bitfield = EncodingUtils.clearBit(__isset_bitfield, __SECONDSSINCEEPOCH_ISSET_ID); + } + + /** Returns true if field secondsSinceEpoch is set (has been assigned a value) and false otherwise */ + public boolean isSetSecondsSinceEpoch() { + return EncodingUtils.testBit(__isset_bitfield, __SECONDSSINCEEPOCH_ISSET_ID); + } + + public void setSecondsSinceEpochIsSet(boolean value) { + __isset_bitfield = EncodingUtils.setBit(__isset_bitfield, __SECONDSSINCEEPOCH_ISSET_ID, value); + } + + public void setFieldValue(_Fields field, Object value) { + switch (field) { + case SECONDS_SINCE_EPOCH: + if (value == null) { + unsetSecondsSinceEpoch(); + } else { + setSecondsSinceEpoch((Long)value); + } + break; + + } + } + + public Object getFieldValue(_Fields field) { + switch (field) { + case SECONDS_SINCE_EPOCH: + return getSecondsSinceEpoch(); + + } + throw new IllegalStateException(); + } + + /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */ + public boolean isSet(_Fields field) { + if (field == null) { + throw new IllegalArgumentException(); + } + + switch (field) { + case SECONDS_SINCE_EPOCH: + return isSetSecondsSinceEpoch(); + } + throw new IllegalStateException(); + } + + @Override + public boolean equals(Object that) { + if (that == null) + return false; + if (that instanceof Timestamp) + return this.equals((Timestamp)that); + return false; + } + + public boolean equals(Timestamp that) { + if (that == null) + return false; + + boolean this_present_secondsSinceEpoch = true; + boolean that_present_secondsSinceEpoch = true; + if (this_present_secondsSinceEpoch || that_present_secondsSinceEpoch) { + if (!(this_present_secondsSinceEpoch && that_present_secondsSinceEpoch)) + return false; + if (this.secondsSinceEpoch != that.secondsSinceEpoch) + return false; + } + + return true; + } + + @Override + public int hashCode() { + List list = new ArrayList(); + + boolean present_secondsSinceEpoch = true; + list.add(present_secondsSinceEpoch); + if (present_secondsSinceEpoch) + list.add(secondsSinceEpoch); + + return list.hashCode(); + } + + @Override + public int compareTo(Timestamp other) { + if (!getClass().equals(other.getClass())) { + return getClass().getName().compareTo(other.getClass().getName()); + } + + int lastComparison = 0; + + lastComparison = Boolean.valueOf(isSetSecondsSinceEpoch()).compareTo(other.isSetSecondsSinceEpoch()); + if (lastComparison != 0) { + return lastComparison; + } + if (isSetSecondsSinceEpoch()) { + lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.secondsSinceEpoch, other.secondsSinceEpoch); + if (lastComparison != 0) { + return lastComparison; + } + } + return 0; + } + + public _Fields fieldForId(int fieldId) { + return _Fields.findByThriftId(fieldId); + } + + public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException { + schemes.get(iprot.getScheme()).getScheme().read(iprot, this); + } + + public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { + schemes.get(oprot.getScheme()).getScheme().write(oprot, this); + } + + @Override + public String toString() { + StringBuilder sb = new StringBuilder("Timestamp("); + boolean first = true; + + sb.append("secondsSinceEpoch:"); + sb.append(this.secondsSinceEpoch); + first = false; + sb.append(")"); + return sb.toString(); + } + + public void validate() throws org.apache.thrift.TException { + // check for required fields + if (!isSetSecondsSinceEpoch()) { + throw new org.apache.thrift.protocol.TProtocolException("Required field 'secondsSinceEpoch' is unset! Struct:" + toString()); + } + + // check for sub-struct validity + } + + private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException { + try { + write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out))); + } catch (org.apache.thrift.TException te) { + throw new java.io.IOException(te); + } + } + + private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException { + try { + // it doesn't seem like you should have to do this, but java serialization is wacky, and doesn't call the default constructor. + __isset_bitfield = 0; + read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in))); + } catch (org.apache.thrift.TException te) { + throw new java.io.IOException(te); + } + } + + private static class TimestampStandardSchemeFactory implements SchemeFactory { + public TimestampStandardScheme getScheme() { + return new TimestampStandardScheme(); + } + } + + private static class TimestampStandardScheme extends StandardScheme { + + public void read(org.apache.thrift.protocol.TProtocol iprot, Timestamp struct) throws org.apache.thrift.TException { + org.apache.thrift.protocol.TField schemeField; + iprot.readStructBegin(); + while (true) + { + schemeField = iprot.readFieldBegin(); + if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { + break; + } + switch (schemeField.id) { + case 1: // SECONDS_SINCE_EPOCH + if (schemeField.type == org.apache.thrift.protocol.TType.I64) { + struct.secondsSinceEpoch = iprot.readI64(); + struct.setSecondsSinceEpochIsSet(true); + } else { + org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); + } + break; + default: + org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); + } + iprot.readFieldEnd(); + } + iprot.readStructEnd(); + struct.validate(); + } + + public void write(org.apache.thrift.protocol.TProtocol oprot, Timestamp struct) throws org.apache.thrift.TException { + struct.validate(); + + oprot.writeStructBegin(STRUCT_DESC); + oprot.writeFieldBegin(SECONDS_SINCE_EPOCH_FIELD_DESC); + oprot.writeI64(struct.secondsSinceEpoch); + oprot.writeFieldEnd(); + oprot.writeFieldStop(); + oprot.writeStructEnd(); + } + + } + + private static class TimestampTupleSchemeFactory implements SchemeFactory { + public TimestampTupleScheme getScheme() { + return new TimestampTupleScheme(); + } + } + + private static class TimestampTupleScheme extends TupleScheme { + + @Override + public void write(org.apache.thrift.protocol.TProtocol prot, Timestamp struct) throws org.apache.thrift.TException { + TTupleProtocol oprot = (TTupleProtocol) prot; + oprot.writeI64(struct.secondsSinceEpoch); + } + + @Override + public void read(org.apache.thrift.protocol.TProtocol prot, Timestamp struct) throws org.apache.thrift.TException { + TTupleProtocol iprot = (TTupleProtocol) prot; + struct.secondsSinceEpoch = iprot.readI64(); + struct.setSecondsSinceEpochIsSet(true); + } + } + +} + diff --git a/standalone-metastore/metastore-common/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/TimestampColumnStatsData.java b/standalone-metastore/metastore-common/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/TimestampColumnStatsData.java new file mode 100644 index 0000000000..3e250f5ff9 --- /dev/null +++ b/standalone-metastore/metastore-common/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/TimestampColumnStatsData.java @@ -0,0 +1,823 @@ +/** + * Autogenerated by Thrift Compiler (0.9.3) + * + * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING + * @generated + */ +package org.apache.hadoop.hive.metastore.api; + +import org.apache.thrift.scheme.IScheme; +import org.apache.thrift.scheme.SchemeFactory; +import org.apache.thrift.scheme.StandardScheme; + +import org.apache.thrift.scheme.TupleScheme; +import org.apache.thrift.protocol.TTupleProtocol; +import org.apache.thrift.protocol.TProtocolException; +import org.apache.thrift.EncodingUtils; +import org.apache.thrift.TException; +import org.apache.thrift.async.AsyncMethodCallback; +import org.apache.thrift.server.AbstractNonblockingServer.*; +import java.util.List; +import java.util.ArrayList; +import java.util.Map; +import java.util.HashMap; +import java.util.EnumMap; +import java.util.Set; +import java.util.HashSet; +import java.util.EnumSet; +import java.util.Collections; +import java.util.BitSet; +import java.nio.ByteBuffer; +import java.util.Arrays; +import javax.annotation.Generated; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +@SuppressWarnings({"cast", "rawtypes", "serial", "unchecked"}) +@Generated(value = "Autogenerated by Thrift Compiler (0.9.3)") +@org.apache.hadoop.classification.InterfaceAudience.Public @org.apache.hadoop.classification.InterfaceStability.Stable public class TimestampColumnStatsData implements org.apache.thrift.TBase, java.io.Serializable, Cloneable, Comparable { + private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TimestampColumnStatsData"); + + private static final org.apache.thrift.protocol.TField LOW_VALUE_FIELD_DESC = new org.apache.thrift.protocol.TField("lowValue", org.apache.thrift.protocol.TType.STRUCT, (short)1); + private static final org.apache.thrift.protocol.TField HIGH_VALUE_FIELD_DESC = new org.apache.thrift.protocol.TField("highValue", org.apache.thrift.protocol.TType.STRUCT, (short)2); + private static final org.apache.thrift.protocol.TField NUM_NULLS_FIELD_DESC = new org.apache.thrift.protocol.TField("numNulls", org.apache.thrift.protocol.TType.I64, (short)3); + private static final org.apache.thrift.protocol.TField NUM_DVS_FIELD_DESC = new org.apache.thrift.protocol.TField("numDVs", org.apache.thrift.protocol.TType.I64, (short)4); + private static final org.apache.thrift.protocol.TField BIT_VECTORS_FIELD_DESC = new org.apache.thrift.protocol.TField("bitVectors", org.apache.thrift.protocol.TType.STRING, (short)5); + + private static final Map, SchemeFactory> schemes = new HashMap, SchemeFactory>(); + static { + schemes.put(StandardScheme.class, new TimestampColumnStatsDataStandardSchemeFactory()); + schemes.put(TupleScheme.class, new TimestampColumnStatsDataTupleSchemeFactory()); + } + + private Timestamp lowValue; // optional + private Timestamp highValue; // optional + private long numNulls; // required + private long numDVs; // required + private ByteBuffer bitVectors; // optional + + /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ + public enum _Fields implements org.apache.thrift.TFieldIdEnum { + LOW_VALUE((short)1, "lowValue"), + HIGH_VALUE((short)2, "highValue"), + NUM_NULLS((short)3, "numNulls"), + NUM_DVS((short)4, "numDVs"), + BIT_VECTORS((short)5, "bitVectors"); + + private static final Map byName = new HashMap(); + + static { + for (_Fields field : EnumSet.allOf(_Fields.class)) { + byName.put(field.getFieldName(), field); + } + } + + /** + * Find the _Fields constant that matches fieldId, or null if its not found. + */ + public static _Fields findByThriftId(int fieldId) { + switch(fieldId) { + case 1: // LOW_VALUE + return LOW_VALUE; + case 2: // HIGH_VALUE + return HIGH_VALUE; + case 3: // NUM_NULLS + return NUM_NULLS; + case 4: // NUM_DVS + return NUM_DVS; + case 5: // BIT_VECTORS + return BIT_VECTORS; + default: + return null; + } + } + + /** + * Find the _Fields constant that matches fieldId, throwing an exception + * if it is not found. + */ + public static _Fields findByThriftIdOrThrow(int fieldId) { + _Fields fields = findByThriftId(fieldId); + if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!"); + return fields; + } + + /** + * Find the _Fields constant that matches name, or null if its not found. + */ + public static _Fields findByName(String name) { + return byName.get(name); + } + + private final short _thriftId; + private final String _fieldName; + + _Fields(short thriftId, String fieldName) { + _thriftId = thriftId; + _fieldName = fieldName; + } + + public short getThriftFieldId() { + return _thriftId; + } + + public String getFieldName() { + return _fieldName; + } + } + + // isset id assignments + private static final int __NUMNULLS_ISSET_ID = 0; + private static final int __NUMDVS_ISSET_ID = 1; + private byte __isset_bitfield = 0; + private static final _Fields optionals[] = {_Fields.LOW_VALUE,_Fields.HIGH_VALUE,_Fields.BIT_VECTORS}; + public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; + static { + Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); + tmpMap.put(_Fields.LOW_VALUE, new org.apache.thrift.meta_data.FieldMetaData("lowValue", org.apache.thrift.TFieldRequirementType.OPTIONAL, + new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, Timestamp.class))); + tmpMap.put(_Fields.HIGH_VALUE, new org.apache.thrift.meta_data.FieldMetaData("highValue", org.apache.thrift.TFieldRequirementType.OPTIONAL, + new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, Timestamp.class))); + tmpMap.put(_Fields.NUM_NULLS, new org.apache.thrift.meta_data.FieldMetaData("numNulls", org.apache.thrift.TFieldRequirementType.REQUIRED, + new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.I64))); + tmpMap.put(_Fields.NUM_DVS, new org.apache.thrift.meta_data.FieldMetaData("numDVs", org.apache.thrift.TFieldRequirementType.REQUIRED, + new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.I64))); + tmpMap.put(_Fields.BIT_VECTORS, new org.apache.thrift.meta_data.FieldMetaData("bitVectors", org.apache.thrift.TFieldRequirementType.OPTIONAL, + new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING , true))); + metaDataMap = Collections.unmodifiableMap(tmpMap); + org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TimestampColumnStatsData.class, metaDataMap); + } + + public TimestampColumnStatsData() { + } + + public TimestampColumnStatsData( + long numNulls, + long numDVs) + { + this(); + this.numNulls = numNulls; + setNumNullsIsSet(true); + this.numDVs = numDVs; + setNumDVsIsSet(true); + } + + /** + * Performs a deep copy on other. + */ + public TimestampColumnStatsData(TimestampColumnStatsData other) { + __isset_bitfield = other.__isset_bitfield; + if (other.isSetLowValue()) { + this.lowValue = new Timestamp(other.lowValue); + } + if (other.isSetHighValue()) { + this.highValue = new Timestamp(other.highValue); + } + this.numNulls = other.numNulls; + this.numDVs = other.numDVs; + if (other.isSetBitVectors()) { + this.bitVectors = org.apache.thrift.TBaseHelper.copyBinary(other.bitVectors); + } + } + + public TimestampColumnStatsData deepCopy() { + return new TimestampColumnStatsData(this); + } + + @Override + public void clear() { + this.lowValue = null; + this.highValue = null; + setNumNullsIsSet(false); + this.numNulls = 0; + setNumDVsIsSet(false); + this.numDVs = 0; + this.bitVectors = null; + } + + public Timestamp getLowValue() { + return this.lowValue; + } + + public void setLowValue(Timestamp lowValue) { + this.lowValue = lowValue; + } + + public void unsetLowValue() { + this.lowValue = null; + } + + /** Returns true if field lowValue is set (has been assigned a value) and false otherwise */ + public boolean isSetLowValue() { + return this.lowValue != null; + } + + public void setLowValueIsSet(boolean value) { + if (!value) { + this.lowValue = null; + } + } + + public Timestamp getHighValue() { + return this.highValue; + } + + public void setHighValue(Timestamp highValue) { + this.highValue = highValue; + } + + public void unsetHighValue() { + this.highValue = null; + } + + /** Returns true if field highValue is set (has been assigned a value) and false otherwise */ + public boolean isSetHighValue() { + return this.highValue != null; + } + + public void setHighValueIsSet(boolean value) { + if (!value) { + this.highValue = null; + } + } + + public long getNumNulls() { + return this.numNulls; + } + + public void setNumNulls(long numNulls) { + this.numNulls = numNulls; + setNumNullsIsSet(true); + } + + public void unsetNumNulls() { + __isset_bitfield = EncodingUtils.clearBit(__isset_bitfield, __NUMNULLS_ISSET_ID); + } + + /** Returns true if field numNulls is set (has been assigned a value) and false otherwise */ + public boolean isSetNumNulls() { + return EncodingUtils.testBit(__isset_bitfield, __NUMNULLS_ISSET_ID); + } + + public void setNumNullsIsSet(boolean value) { + __isset_bitfield = EncodingUtils.setBit(__isset_bitfield, __NUMNULLS_ISSET_ID, value); + } + + public long getNumDVs() { + return this.numDVs; + } + + public void setNumDVs(long numDVs) { + this.numDVs = numDVs; + setNumDVsIsSet(true); + } + + public void unsetNumDVs() { + __isset_bitfield = EncodingUtils.clearBit(__isset_bitfield, __NUMDVS_ISSET_ID); + } + + /** Returns true if field numDVs is set (has been assigned a value) and false otherwise */ + public boolean isSetNumDVs() { + return EncodingUtils.testBit(__isset_bitfield, __NUMDVS_ISSET_ID); + } + + public void setNumDVsIsSet(boolean value) { + __isset_bitfield = EncodingUtils.setBit(__isset_bitfield, __NUMDVS_ISSET_ID, value); + } + + public byte[] getBitVectors() { + setBitVectors(org.apache.thrift.TBaseHelper.rightSize(bitVectors)); + return bitVectors == null ? null : bitVectors.array(); + } + + public ByteBuffer bufferForBitVectors() { + return org.apache.thrift.TBaseHelper.copyBinary(bitVectors); + } + + public void setBitVectors(byte[] bitVectors) { + this.bitVectors = bitVectors == null ? (ByteBuffer)null : ByteBuffer.wrap(Arrays.copyOf(bitVectors, bitVectors.length)); + } + + public void setBitVectors(ByteBuffer bitVectors) { + this.bitVectors = org.apache.thrift.TBaseHelper.copyBinary(bitVectors); + } + + public void unsetBitVectors() { + this.bitVectors = null; + } + + /** Returns true if field bitVectors is set (has been assigned a value) and false otherwise */ + public boolean isSetBitVectors() { + return this.bitVectors != null; + } + + public void setBitVectorsIsSet(boolean value) { + if (!value) { + this.bitVectors = null; + } + } + + public void setFieldValue(_Fields field, Object value) { + switch (field) { + case LOW_VALUE: + if (value == null) { + unsetLowValue(); + } else { + setLowValue((Timestamp)value); + } + break; + + case HIGH_VALUE: + if (value == null) { + unsetHighValue(); + } else { + setHighValue((Timestamp)value); + } + break; + + case NUM_NULLS: + if (value == null) { + unsetNumNulls(); + } else { + setNumNulls((Long)value); + } + break; + + case NUM_DVS: + if (value == null) { + unsetNumDVs(); + } else { + setNumDVs((Long)value); + } + break; + + case BIT_VECTORS: + if (value == null) { + unsetBitVectors(); + } else { + setBitVectors((ByteBuffer)value); + } + break; + + } + } + + public Object getFieldValue(_Fields field) { + switch (field) { + case LOW_VALUE: + return getLowValue(); + + case HIGH_VALUE: + return getHighValue(); + + case NUM_NULLS: + return getNumNulls(); + + case NUM_DVS: + return getNumDVs(); + + case BIT_VECTORS: + return getBitVectors(); + + } + throw new IllegalStateException(); + } + + /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */ + public boolean isSet(_Fields field) { + if (field == null) { + throw new IllegalArgumentException(); + } + + switch (field) { + case LOW_VALUE: + return isSetLowValue(); + case HIGH_VALUE: + return isSetHighValue(); + case NUM_NULLS: + return isSetNumNulls(); + case NUM_DVS: + return isSetNumDVs(); + case BIT_VECTORS: + return isSetBitVectors(); + } + throw new IllegalStateException(); + } + + @Override + public boolean equals(Object that) { + if (that == null) + return false; + if (that instanceof TimestampColumnStatsData) + return this.equals((TimestampColumnStatsData)that); + return false; + } + + public boolean equals(TimestampColumnStatsData that) { + if (that == null) + return false; + + boolean this_present_lowValue = true && this.isSetLowValue(); + boolean that_present_lowValue = true && that.isSetLowValue(); + if (this_present_lowValue || that_present_lowValue) { + if (!(this_present_lowValue && that_present_lowValue)) + return false; + if (!this.lowValue.equals(that.lowValue)) + return false; + } + + boolean this_present_highValue = true && this.isSetHighValue(); + boolean that_present_highValue = true && that.isSetHighValue(); + if (this_present_highValue || that_present_highValue) { + if (!(this_present_highValue && that_present_highValue)) + return false; + if (!this.highValue.equals(that.highValue)) + return false; + } + + boolean this_present_numNulls = true; + boolean that_present_numNulls = true; + if (this_present_numNulls || that_present_numNulls) { + if (!(this_present_numNulls && that_present_numNulls)) + return false; + if (this.numNulls != that.numNulls) + return false; + } + + boolean this_present_numDVs = true; + boolean that_present_numDVs = true; + if (this_present_numDVs || that_present_numDVs) { + if (!(this_present_numDVs && that_present_numDVs)) + return false; + if (this.numDVs != that.numDVs) + return false; + } + + boolean this_present_bitVectors = true && this.isSetBitVectors(); + boolean that_present_bitVectors = true && that.isSetBitVectors(); + if (this_present_bitVectors || that_present_bitVectors) { + if (!(this_present_bitVectors && that_present_bitVectors)) + return false; + if (!this.bitVectors.equals(that.bitVectors)) + return false; + } + + return true; + } + + @Override + public int hashCode() { + List list = new ArrayList(); + + boolean present_lowValue = true && (isSetLowValue()); + list.add(present_lowValue); + if (present_lowValue) + list.add(lowValue); + + boolean present_highValue = true && (isSetHighValue()); + list.add(present_highValue); + if (present_highValue) + list.add(highValue); + + boolean present_numNulls = true; + list.add(present_numNulls); + if (present_numNulls) + list.add(numNulls); + + boolean present_numDVs = true; + list.add(present_numDVs); + if (present_numDVs) + list.add(numDVs); + + boolean present_bitVectors = true && (isSetBitVectors()); + list.add(present_bitVectors); + if (present_bitVectors) + list.add(bitVectors); + + return list.hashCode(); + } + + @Override + public int compareTo(TimestampColumnStatsData other) { + if (!getClass().equals(other.getClass())) { + return getClass().getName().compareTo(other.getClass().getName()); + } + + int lastComparison = 0; + + lastComparison = Boolean.valueOf(isSetLowValue()).compareTo(other.isSetLowValue()); + if (lastComparison != 0) { + return lastComparison; + } + if (isSetLowValue()) { + lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.lowValue, other.lowValue); + if (lastComparison != 0) { + return lastComparison; + } + } + lastComparison = Boolean.valueOf(isSetHighValue()).compareTo(other.isSetHighValue()); + if (lastComparison != 0) { + return lastComparison; + } + if (isSetHighValue()) { + lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.highValue, other.highValue); + if (lastComparison != 0) { + return lastComparison; + } + } + lastComparison = Boolean.valueOf(isSetNumNulls()).compareTo(other.isSetNumNulls()); + if (lastComparison != 0) { + return lastComparison; + } + if (isSetNumNulls()) { + lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.numNulls, other.numNulls); + if (lastComparison != 0) { + return lastComparison; + } + } + lastComparison = Boolean.valueOf(isSetNumDVs()).compareTo(other.isSetNumDVs()); + if (lastComparison != 0) { + return lastComparison; + } + if (isSetNumDVs()) { + lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.numDVs, other.numDVs); + if (lastComparison != 0) { + return lastComparison; + } + } + lastComparison = Boolean.valueOf(isSetBitVectors()).compareTo(other.isSetBitVectors()); + if (lastComparison != 0) { + return lastComparison; + } + if (isSetBitVectors()) { + lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.bitVectors, other.bitVectors); + if (lastComparison != 0) { + return lastComparison; + } + } + return 0; + } + + public _Fields fieldForId(int fieldId) { + return _Fields.findByThriftId(fieldId); + } + + public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException { + schemes.get(iprot.getScheme()).getScheme().read(iprot, this); + } + + public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { + schemes.get(oprot.getScheme()).getScheme().write(oprot, this); + } + + @Override + public String toString() { + StringBuilder sb = new StringBuilder("TimestampColumnStatsData("); + boolean first = true; + + if (isSetLowValue()) { + sb.append("lowValue:"); + if (this.lowValue == null) { + sb.append("null"); + } else { + sb.append(this.lowValue); + } + first = false; + } + if (isSetHighValue()) { + if (!first) sb.append(", "); + sb.append("highValue:"); + if (this.highValue == null) { + sb.append("null"); + } else { + sb.append(this.highValue); + } + first = false; + } + if (!first) sb.append(", "); + sb.append("numNulls:"); + sb.append(this.numNulls); + first = false; + if (!first) sb.append(", "); + sb.append("numDVs:"); + sb.append(this.numDVs); + first = false; + if (isSetBitVectors()) { + if (!first) sb.append(", "); + sb.append("bitVectors:"); + if (this.bitVectors == null) { + sb.append("null"); + } else { + org.apache.thrift.TBaseHelper.toString(this.bitVectors, sb); + } + first = false; + } + sb.append(")"); + return sb.toString(); + } + + public void validate() throws org.apache.thrift.TException { + // check for required fields + if (!isSetNumNulls()) { + throw new org.apache.thrift.protocol.TProtocolException("Required field 'numNulls' is unset! Struct:" + toString()); + } + + if (!isSetNumDVs()) { + throw new org.apache.thrift.protocol.TProtocolException("Required field 'numDVs' is unset! Struct:" + toString()); + } + + // check for sub-struct validity + if (lowValue != null) { + lowValue.validate(); + } + if (highValue != null) { + highValue.validate(); + } + } + + private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException { + try { + write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out))); + } catch (org.apache.thrift.TException te) { + throw new java.io.IOException(te); + } + } + + private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException { + try { + // it doesn't seem like you should have to do this, but java serialization is wacky, and doesn't call the default constructor. + __isset_bitfield = 0; + read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in))); + } catch (org.apache.thrift.TException te) { + throw new java.io.IOException(te); + } + } + + private static class TimestampColumnStatsDataStandardSchemeFactory implements SchemeFactory { + public TimestampColumnStatsDataStandardScheme getScheme() { + return new TimestampColumnStatsDataStandardScheme(); + } + } + + private static class TimestampColumnStatsDataStandardScheme extends StandardScheme { + + public void read(org.apache.thrift.protocol.TProtocol iprot, TimestampColumnStatsData struct) throws org.apache.thrift.TException { + org.apache.thrift.protocol.TField schemeField; + iprot.readStructBegin(); + while (true) + { + schemeField = iprot.readFieldBegin(); + if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { + break; + } + switch (schemeField.id) { + case 1: // LOW_VALUE + if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) { + struct.lowValue = new Timestamp(); + struct.lowValue.read(iprot); + struct.setLowValueIsSet(true); + } else { + org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); + } + break; + case 2: // HIGH_VALUE + if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) { + struct.highValue = new Timestamp(); + struct.highValue.read(iprot); + struct.setHighValueIsSet(true); + } else { + org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); + } + break; + case 3: // NUM_NULLS + if (schemeField.type == org.apache.thrift.protocol.TType.I64) { + struct.numNulls = iprot.readI64(); + struct.setNumNullsIsSet(true); + } else { + org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); + } + break; + case 4: // NUM_DVS + if (schemeField.type == org.apache.thrift.protocol.TType.I64) { + struct.numDVs = iprot.readI64(); + struct.setNumDVsIsSet(true); + } else { + org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); + } + break; + case 5: // BIT_VECTORS + if (schemeField.type == org.apache.thrift.protocol.TType.STRING) { + struct.bitVectors = iprot.readBinary(); + struct.setBitVectorsIsSet(true); + } else { + org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); + } + break; + default: + org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); + } + iprot.readFieldEnd(); + } + iprot.readStructEnd(); + struct.validate(); + } + + public void write(org.apache.thrift.protocol.TProtocol oprot, TimestampColumnStatsData struct) throws org.apache.thrift.TException { + struct.validate(); + + oprot.writeStructBegin(STRUCT_DESC); + if (struct.lowValue != null) { + if (struct.isSetLowValue()) { + oprot.writeFieldBegin(LOW_VALUE_FIELD_DESC); + struct.lowValue.write(oprot); + oprot.writeFieldEnd(); + } + } + if (struct.highValue != null) { + if (struct.isSetHighValue()) { + oprot.writeFieldBegin(HIGH_VALUE_FIELD_DESC); + struct.highValue.write(oprot); + oprot.writeFieldEnd(); + } + } + oprot.writeFieldBegin(NUM_NULLS_FIELD_DESC); + oprot.writeI64(struct.numNulls); + oprot.writeFieldEnd(); + oprot.writeFieldBegin(NUM_DVS_FIELD_DESC); + oprot.writeI64(struct.numDVs); + oprot.writeFieldEnd(); + if (struct.bitVectors != null) { + if (struct.isSetBitVectors()) { + oprot.writeFieldBegin(BIT_VECTORS_FIELD_DESC); + oprot.writeBinary(struct.bitVectors); + oprot.writeFieldEnd(); + } + } + oprot.writeFieldStop(); + oprot.writeStructEnd(); + } + + } + + private static class TimestampColumnStatsDataTupleSchemeFactory implements SchemeFactory { + public TimestampColumnStatsDataTupleScheme getScheme() { + return new TimestampColumnStatsDataTupleScheme(); + } + } + + private static class TimestampColumnStatsDataTupleScheme extends TupleScheme { + + @Override + public void write(org.apache.thrift.protocol.TProtocol prot, TimestampColumnStatsData struct) throws org.apache.thrift.TException { + TTupleProtocol oprot = (TTupleProtocol) prot; + oprot.writeI64(struct.numNulls); + oprot.writeI64(struct.numDVs); + BitSet optionals = new BitSet(); + if (struct.isSetLowValue()) { + optionals.set(0); + } + if (struct.isSetHighValue()) { + optionals.set(1); + } + if (struct.isSetBitVectors()) { + optionals.set(2); + } + oprot.writeBitSet(optionals, 3); + if (struct.isSetLowValue()) { + struct.lowValue.write(oprot); + } + if (struct.isSetHighValue()) { + struct.highValue.write(oprot); + } + if (struct.isSetBitVectors()) { + oprot.writeBinary(struct.bitVectors); + } + } + + @Override + public void read(org.apache.thrift.protocol.TProtocol prot, TimestampColumnStatsData struct) throws org.apache.thrift.TException { + TTupleProtocol iprot = (TTupleProtocol) prot; + struct.numNulls = iprot.readI64(); + struct.setNumNullsIsSet(true); + struct.numDVs = iprot.readI64(); + struct.setNumDVsIsSet(true); + BitSet incoming = iprot.readBitSet(3); + if (incoming.get(0)) { + struct.lowValue = new Timestamp(); + struct.lowValue.read(iprot); + struct.setLowValueIsSet(true); + } + if (incoming.get(1)) { + struct.highValue = new Timestamp(); + struct.highValue.read(iprot); + struct.setHighValueIsSet(true); + } + if (incoming.get(2)) { + struct.bitVectors = iprot.readBinary(); + struct.setBitVectorsIsSet(true); + } + } + } + +} + diff --git a/standalone-metastore/metastore-common/src/gen/thrift/gen-php/metastore/Types.php b/standalone-metastore/metastore-common/src/gen/thrift/gen-php/metastore/Types.php index c4add01d09..ee6b48d915 100644 --- a/standalone-metastore/metastore-common/src/gen/thrift/gen-php/metastore/Types.php +++ b/standalone-metastore/metastore-common/src/gen/thrift/gen-php/metastore/Types.php @@ -9856,6 +9856,258 @@ class DateColumnStatsData { } +class Timestamp { + static $_TSPEC; + + /** + * @var int + */ + public $secondsSinceEpoch = null; + + public function __construct($vals=null) { + if (!isset(self::$_TSPEC)) { + self::$_TSPEC = array( + 1 => array( + 'var' => 'secondsSinceEpoch', + 'type' => TType::I64, + ), + ); + } + if (is_array($vals)) { + if (isset($vals['secondsSinceEpoch'])) { + $this->secondsSinceEpoch = $vals['secondsSinceEpoch']; + } + } + } + + public function getName() { + return 'Timestamp'; + } + + public function read($input) + { + $xfer = 0; + $fname = null; + $ftype = 0; + $fid = 0; + $xfer += $input->readStructBegin($fname); + while (true) + { + $xfer += $input->readFieldBegin($fname, $ftype, $fid); + if ($ftype == TType::STOP) { + break; + } + switch ($fid) + { + case 1: + if ($ftype == TType::I64) { + $xfer += $input->readI64($this->secondsSinceEpoch); + } else { + $xfer += $input->skip($ftype); + } + break; + default: + $xfer += $input->skip($ftype); + break; + } + $xfer += $input->readFieldEnd(); + } + $xfer += $input->readStructEnd(); + return $xfer; + } + + public function write($output) { + $xfer = 0; + $xfer += $output->writeStructBegin('Timestamp'); + if ($this->secondsSinceEpoch !== null) { + $xfer += $output->writeFieldBegin('secondsSinceEpoch', TType::I64, 1); + $xfer += $output->writeI64($this->secondsSinceEpoch); + $xfer += $output->writeFieldEnd(); + } + $xfer += $output->writeFieldStop(); + $xfer += $output->writeStructEnd(); + return $xfer; + } + +} + +class TimestampColumnStatsData { + static $_TSPEC; + + /** + * @var \metastore\Timestamp + */ + public $lowValue = null; + /** + * @var \metastore\Timestamp + */ + public $highValue = null; + /** + * @var int + */ + public $numNulls = null; + /** + * @var int + */ + public $numDVs = null; + /** + * @var string + */ + public $bitVectors = null; + + public function __construct($vals=null) { + if (!isset(self::$_TSPEC)) { + self::$_TSPEC = array( + 1 => array( + 'var' => 'lowValue', + 'type' => TType::STRUCT, + 'class' => '\metastore\Timestamp', + ), + 2 => array( + 'var' => 'highValue', + 'type' => TType::STRUCT, + 'class' => '\metastore\Timestamp', + ), + 3 => array( + 'var' => 'numNulls', + 'type' => TType::I64, + ), + 4 => array( + 'var' => 'numDVs', + 'type' => TType::I64, + ), + 5 => array( + 'var' => 'bitVectors', + 'type' => TType::STRING, + ), + ); + } + if (is_array($vals)) { + if (isset($vals['lowValue'])) { + $this->lowValue = $vals['lowValue']; + } + if (isset($vals['highValue'])) { + $this->highValue = $vals['highValue']; + } + if (isset($vals['numNulls'])) { + $this->numNulls = $vals['numNulls']; + } + if (isset($vals['numDVs'])) { + $this->numDVs = $vals['numDVs']; + } + if (isset($vals['bitVectors'])) { + $this->bitVectors = $vals['bitVectors']; + } + } + } + + public function getName() { + return 'TimestampColumnStatsData'; + } + + public function read($input) + { + $xfer = 0; + $fname = null; + $ftype = 0; + $fid = 0; + $xfer += $input->readStructBegin($fname); + while (true) + { + $xfer += $input->readFieldBegin($fname, $ftype, $fid); + if ($ftype == TType::STOP) { + break; + } + switch ($fid) + { + case 1: + if ($ftype == TType::STRUCT) { + $this->lowValue = new \metastore\Timestamp(); + $xfer += $this->lowValue->read($input); + } else { + $xfer += $input->skip($ftype); + } + break; + case 2: + if ($ftype == TType::STRUCT) { + $this->highValue = new \metastore\Timestamp(); + $xfer += $this->highValue->read($input); + } else { + $xfer += $input->skip($ftype); + } + break; + case 3: + if ($ftype == TType::I64) { + $xfer += $input->readI64($this->numNulls); + } else { + $xfer += $input->skip($ftype); + } + break; + case 4: + if ($ftype == TType::I64) { + $xfer += $input->readI64($this->numDVs); + } else { + $xfer += $input->skip($ftype); + } + break; + case 5: + if ($ftype == TType::STRING) { + $xfer += $input->readString($this->bitVectors); + } else { + $xfer += $input->skip($ftype); + } + break; + default: + $xfer += $input->skip($ftype); + break; + } + $xfer += $input->readFieldEnd(); + } + $xfer += $input->readStructEnd(); + return $xfer; + } + + public function write($output) { + $xfer = 0; + $xfer += $output->writeStructBegin('TimestampColumnStatsData'); + if ($this->lowValue !== null) { + if (!is_object($this->lowValue)) { + throw new TProtocolException('Bad type in structure.', TProtocolException::INVALID_DATA); + } + $xfer += $output->writeFieldBegin('lowValue', TType::STRUCT, 1); + $xfer += $this->lowValue->write($output); + $xfer += $output->writeFieldEnd(); + } + if ($this->highValue !== null) { + if (!is_object($this->highValue)) { + throw new TProtocolException('Bad type in structure.', TProtocolException::INVALID_DATA); + } + $xfer += $output->writeFieldBegin('highValue', TType::STRUCT, 2); + $xfer += $this->highValue->write($output); + $xfer += $output->writeFieldEnd(); + } + if ($this->numNulls !== null) { + $xfer += $output->writeFieldBegin('numNulls', TType::I64, 3); + $xfer += $output->writeI64($this->numNulls); + $xfer += $output->writeFieldEnd(); + } + if ($this->numDVs !== null) { + $xfer += $output->writeFieldBegin('numDVs', TType::I64, 4); + $xfer += $output->writeI64($this->numDVs); + $xfer += $output->writeFieldEnd(); + } + if ($this->bitVectors !== null) { + $xfer += $output->writeFieldBegin('bitVectors', TType::STRING, 5); + $xfer += $output->writeString($this->bitVectors); + $xfer += $output->writeFieldEnd(); + } + $xfer += $output->writeFieldStop(); + $xfer += $output->writeStructEnd(); + return $xfer; + } + +} + class ColumnStatisticsData { static $_TSPEC; @@ -9887,6 +10139,10 @@ class ColumnStatisticsData { * @var \metastore\DateColumnStatsData */ public $dateStats = null; + /** + * @var \metastore\TimestampColumnStatsData + */ + public $timestampStats = null; public function __construct($vals=null) { if (!isset(self::$_TSPEC)) { @@ -9926,6 +10182,11 @@ class ColumnStatisticsData { 'type' => TType::STRUCT, 'class' => '\metastore\DateColumnStatsData', ), + 8 => array( + 'var' => 'timestampStats', + 'type' => TType::STRUCT, + 'class' => '\metastore\TimestampColumnStatsData', + ), ); } if (is_array($vals)) { @@ -9950,6 +10211,9 @@ class ColumnStatisticsData { if (isset($vals['dateStats'])) { $this->dateStats = $vals['dateStats']; } + if (isset($vals['timestampStats'])) { + $this->timestampStats = $vals['timestampStats']; + } } } @@ -10028,6 +10292,14 @@ class ColumnStatisticsData { $xfer += $input->skip($ftype); } break; + case 8: + if ($ftype == TType::STRUCT) { + $this->timestampStats = new \metastore\TimestampColumnStatsData(); + $xfer += $this->timestampStats->read($input); + } else { + $xfer += $input->skip($ftype); + } + break; default: $xfer += $input->skip($ftype); break; @@ -10097,6 +10369,14 @@ class ColumnStatisticsData { $xfer += $this->dateStats->write($output); $xfer += $output->writeFieldEnd(); } + if ($this->timestampStats !== null) { + if (!is_object($this->timestampStats)) { + throw new TProtocolException('Bad type in structure.', TProtocolException::INVALID_DATA); + } + $xfer += $output->writeFieldBegin('timestampStats', TType::STRUCT, 8); + $xfer += $this->timestampStats->write($output); + $xfer += $output->writeFieldEnd(); + } $xfer += $output->writeFieldStop(); $xfer += $output->writeStructEnd(); return $xfer; diff --git a/standalone-metastore/metastore-common/src/gen/thrift/gen-py/hive_metastore/ttypes.py b/standalone-metastore/metastore-common/src/gen/thrift/gen-py/hive_metastore/ttypes.py index 509d5cfa52..353005be5f 100644 --- a/standalone-metastore/metastore-common/src/gen/thrift/gen-py/hive_metastore/ttypes.py +++ b/standalone-metastore/metastore-common/src/gen/thrift/gen-py/hive_metastore/ttypes.py @@ -6795,6 +6795,196 @@ def __eq__(self, other): def __ne__(self, other): return not (self == other) +class Timestamp: + """ + Attributes: + - secondsSinceEpoch + """ + + thrift_spec = ( + None, # 0 + (1, TType.I64, 'secondsSinceEpoch', None, None, ), # 1 + ) + + def __init__(self, secondsSinceEpoch=None,): + self.secondsSinceEpoch = secondsSinceEpoch + + def read(self, iprot): + if iprot.__class__ == TBinaryProtocol.TBinaryProtocolAccelerated and isinstance(iprot.trans, TTransport.CReadableTransport) and self.thrift_spec is not None and fastbinary is not None: + fastbinary.decode_binary(self, iprot.trans, (self.__class__, self.thrift_spec)) + return + iprot.readStructBegin() + while True: + (fname, ftype, fid) = iprot.readFieldBegin() + if ftype == TType.STOP: + break + if fid == 1: + if ftype == TType.I64: + self.secondsSinceEpoch = iprot.readI64() + else: + iprot.skip(ftype) + else: + iprot.skip(ftype) + iprot.readFieldEnd() + iprot.readStructEnd() + + def write(self, oprot): + if oprot.__class__ == TBinaryProtocol.TBinaryProtocolAccelerated and self.thrift_spec is not None and fastbinary is not None: + oprot.trans.write(fastbinary.encode_binary(self, (self.__class__, self.thrift_spec))) + return + oprot.writeStructBegin('Timestamp') + if self.secondsSinceEpoch is not None: + oprot.writeFieldBegin('secondsSinceEpoch', TType.I64, 1) + oprot.writeI64(self.secondsSinceEpoch) + oprot.writeFieldEnd() + oprot.writeFieldStop() + oprot.writeStructEnd() + + def validate(self): + if self.secondsSinceEpoch is None: + raise TProtocol.TProtocolException(message='Required field secondsSinceEpoch is unset!') + return + + + def __hash__(self): + value = 17 + value = (value * 31) ^ hash(self.secondsSinceEpoch) + return value + + def __repr__(self): + L = ['%s=%r' % (key, value) + for key, value in self.__dict__.iteritems()] + return '%s(%s)' % (self.__class__.__name__, ', '.join(L)) + + def __eq__(self, other): + return isinstance(other, self.__class__) and self.__dict__ == other.__dict__ + + def __ne__(self, other): + return not (self == other) + +class TimestampColumnStatsData: + """ + Attributes: + - lowValue + - highValue + - numNulls + - numDVs + - bitVectors + """ + + thrift_spec = ( + None, # 0 + (1, TType.STRUCT, 'lowValue', (Timestamp, Timestamp.thrift_spec), None, ), # 1 + (2, TType.STRUCT, 'highValue', (Timestamp, Timestamp.thrift_spec), None, ), # 2 + (3, TType.I64, 'numNulls', None, None, ), # 3 + (4, TType.I64, 'numDVs', None, None, ), # 4 + (5, TType.STRING, 'bitVectors', None, None, ), # 5 + ) + + def __init__(self, lowValue=None, highValue=None, numNulls=None, numDVs=None, bitVectors=None,): + self.lowValue = lowValue + self.highValue = highValue + self.numNulls = numNulls + self.numDVs = numDVs + self.bitVectors = bitVectors + + def read(self, iprot): + if iprot.__class__ == TBinaryProtocol.TBinaryProtocolAccelerated and isinstance(iprot.trans, TTransport.CReadableTransport) and self.thrift_spec is not None and fastbinary is not None: + fastbinary.decode_binary(self, iprot.trans, (self.__class__, self.thrift_spec)) + return + iprot.readStructBegin() + while True: + (fname, ftype, fid) = iprot.readFieldBegin() + if ftype == TType.STOP: + break + if fid == 1: + if ftype == TType.STRUCT: + self.lowValue = Timestamp() + self.lowValue.read(iprot) + else: + iprot.skip(ftype) + elif fid == 2: + if ftype == TType.STRUCT: + self.highValue = Timestamp() + self.highValue.read(iprot) + else: + iprot.skip(ftype) + elif fid == 3: + if ftype == TType.I64: + self.numNulls = iprot.readI64() + else: + iprot.skip(ftype) + elif fid == 4: + if ftype == TType.I64: + self.numDVs = iprot.readI64() + else: + iprot.skip(ftype) + elif fid == 5: + if ftype == TType.STRING: + self.bitVectors = iprot.readString() + else: + iprot.skip(ftype) + else: + iprot.skip(ftype) + iprot.readFieldEnd() + iprot.readStructEnd() + + def write(self, oprot): + if oprot.__class__ == TBinaryProtocol.TBinaryProtocolAccelerated and self.thrift_spec is not None and fastbinary is not None: + oprot.trans.write(fastbinary.encode_binary(self, (self.__class__, self.thrift_spec))) + return + oprot.writeStructBegin('TimestampColumnStatsData') + if self.lowValue is not None: + oprot.writeFieldBegin('lowValue', TType.STRUCT, 1) + self.lowValue.write(oprot) + oprot.writeFieldEnd() + if self.highValue is not None: + oprot.writeFieldBegin('highValue', TType.STRUCT, 2) + self.highValue.write(oprot) + oprot.writeFieldEnd() + if self.numNulls is not None: + oprot.writeFieldBegin('numNulls', TType.I64, 3) + oprot.writeI64(self.numNulls) + oprot.writeFieldEnd() + if self.numDVs is not None: + oprot.writeFieldBegin('numDVs', TType.I64, 4) + oprot.writeI64(self.numDVs) + oprot.writeFieldEnd() + if self.bitVectors is not None: + oprot.writeFieldBegin('bitVectors', TType.STRING, 5) + oprot.writeString(self.bitVectors) + oprot.writeFieldEnd() + oprot.writeFieldStop() + oprot.writeStructEnd() + + def validate(self): + if self.numNulls is None: + raise TProtocol.TProtocolException(message='Required field numNulls is unset!') + if self.numDVs is None: + raise TProtocol.TProtocolException(message='Required field numDVs is unset!') + return + + + def __hash__(self): + value = 17 + value = (value * 31) ^ hash(self.lowValue) + value = (value * 31) ^ hash(self.highValue) + value = (value * 31) ^ hash(self.numNulls) + value = (value * 31) ^ hash(self.numDVs) + value = (value * 31) ^ hash(self.bitVectors) + return value + + def __repr__(self): + L = ['%s=%r' % (key, value) + for key, value in self.__dict__.iteritems()] + return '%s(%s)' % (self.__class__.__name__, ', '.join(L)) + + def __eq__(self, other): + return isinstance(other, self.__class__) and self.__dict__ == other.__dict__ + + def __ne__(self, other): + return not (self == other) + class ColumnStatisticsData: """ Attributes: @@ -6805,6 +6995,7 @@ class ColumnStatisticsData: - binaryStats - decimalStats - dateStats + - timestampStats """ thrift_spec = ( @@ -6816,9 +7007,10 @@ class ColumnStatisticsData: (5, TType.STRUCT, 'binaryStats', (BinaryColumnStatsData, BinaryColumnStatsData.thrift_spec), None, ), # 5 (6, TType.STRUCT, 'decimalStats', (DecimalColumnStatsData, DecimalColumnStatsData.thrift_spec), None, ), # 6 (7, TType.STRUCT, 'dateStats', (DateColumnStatsData, DateColumnStatsData.thrift_spec), None, ), # 7 + (8, TType.STRUCT, 'timestampStats', (TimestampColumnStatsData, TimestampColumnStatsData.thrift_spec), None, ), # 8 ) - def __init__(self, booleanStats=None, longStats=None, doubleStats=None, stringStats=None, binaryStats=None, decimalStats=None, dateStats=None,): + def __init__(self, booleanStats=None, longStats=None, doubleStats=None, stringStats=None, binaryStats=None, decimalStats=None, dateStats=None, timestampStats=None,): self.booleanStats = booleanStats self.longStats = longStats self.doubleStats = doubleStats @@ -6826,6 +7018,7 @@ def __init__(self, booleanStats=None, longStats=None, doubleStats=None, stringSt self.binaryStats = binaryStats self.decimalStats = decimalStats self.dateStats = dateStats + self.timestampStats = timestampStats def read(self, iprot): if iprot.__class__ == TBinaryProtocol.TBinaryProtocolAccelerated and isinstance(iprot.trans, TTransport.CReadableTransport) and self.thrift_spec is not None and fastbinary is not None: @@ -6878,6 +7071,12 @@ def read(self, iprot): self.dateStats.read(iprot) else: iprot.skip(ftype) + elif fid == 8: + if ftype == TType.STRUCT: + self.timestampStats = TimestampColumnStatsData() + self.timestampStats.read(iprot) + else: + iprot.skip(ftype) else: iprot.skip(ftype) iprot.readFieldEnd() @@ -6916,6 +7115,10 @@ def write(self, oprot): oprot.writeFieldBegin('dateStats', TType.STRUCT, 7) self.dateStats.write(oprot) oprot.writeFieldEnd() + if self.timestampStats is not None: + oprot.writeFieldBegin('timestampStats', TType.STRUCT, 8) + self.timestampStats.write(oprot) + oprot.writeFieldEnd() oprot.writeFieldStop() oprot.writeStructEnd() @@ -6932,6 +7135,7 @@ def __hash__(self): value = (value * 31) ^ hash(self.binaryStats) value = (value * 31) ^ hash(self.decimalStats) value = (value * 31) ^ hash(self.dateStats) + value = (value * 31) ^ hash(self.timestampStats) return value def __repr__(self): diff --git a/standalone-metastore/metastore-common/src/gen/thrift/gen-rb/hive_metastore_types.rb b/standalone-metastore/metastore-common/src/gen/thrift/gen-rb/hive_metastore_types.rb index 2cfc19e715..d7fbcc61ae 100644 --- a/standalone-metastore/metastore-common/src/gen/thrift/gen-rb/hive_metastore_types.rb +++ b/standalone-metastore/metastore-common/src/gen/thrift/gen-rb/hive_metastore_types.rb @@ -1510,6 +1510,49 @@ class DateColumnStatsData ::Thrift::Struct.generate_accessors self end +class Timestamp + include ::Thrift::Struct, ::Thrift::Struct_Union + SECONDSSINCEEPOCH = 1 + + FIELDS = { + SECONDSSINCEEPOCH => {:type => ::Thrift::Types::I64, :name => 'secondsSinceEpoch'} + } + + def struct_fields; FIELDS; end + + def validate + raise ::Thrift::ProtocolException.new(::Thrift::ProtocolException::UNKNOWN, 'Required field secondsSinceEpoch is unset!') unless @secondsSinceEpoch + end + + ::Thrift::Struct.generate_accessors self +end + +class TimestampColumnStatsData + include ::Thrift::Struct, ::Thrift::Struct_Union + LOWVALUE = 1 + HIGHVALUE = 2 + NUMNULLS = 3 + NUMDVS = 4 + BITVECTORS = 5 + + FIELDS = { + LOWVALUE => {:type => ::Thrift::Types::STRUCT, :name => 'lowValue', :class => ::Timestamp, :optional => true}, + HIGHVALUE => {:type => ::Thrift::Types::STRUCT, :name => 'highValue', :class => ::Timestamp, :optional => true}, + NUMNULLS => {:type => ::Thrift::Types::I64, :name => 'numNulls'}, + NUMDVS => {:type => ::Thrift::Types::I64, :name => 'numDVs'}, + BITVECTORS => {:type => ::Thrift::Types::STRING, :name => 'bitVectors', :binary => true, :optional => true} + } + + def struct_fields; FIELDS; end + + def validate + raise ::Thrift::ProtocolException.new(::Thrift::ProtocolException::UNKNOWN, 'Required field numNulls is unset!') unless @numNulls + raise ::Thrift::ProtocolException.new(::Thrift::ProtocolException::UNKNOWN, 'Required field numDVs is unset!') unless @numDVs + end + + ::Thrift::Struct.generate_accessors self +end + class ColumnStatisticsData < ::Thrift::Union include ::Thrift::Struct_Union class << self @@ -1540,6 +1583,10 @@ class ColumnStatisticsData < ::Thrift::Union def dateStats(val) ColumnStatisticsData.new(:dateStats, val) end + + def timestampStats(val) + ColumnStatisticsData.new(:timestampStats, val) + end end BOOLEANSTATS = 1 @@ -1549,6 +1596,7 @@ class ColumnStatisticsData < ::Thrift::Union BINARYSTATS = 5 DECIMALSTATS = 6 DATESTATS = 7 + TIMESTAMPSTATS = 8 FIELDS = { BOOLEANSTATS => {:type => ::Thrift::Types::STRUCT, :name => 'booleanStats', :class => ::BooleanColumnStatsData}, @@ -1557,7 +1605,8 @@ class ColumnStatisticsData < ::Thrift::Union STRINGSTATS => {:type => ::Thrift::Types::STRUCT, :name => 'stringStats', :class => ::StringColumnStatsData}, BINARYSTATS => {:type => ::Thrift::Types::STRUCT, :name => 'binaryStats', :class => ::BinaryColumnStatsData}, DECIMALSTATS => {:type => ::Thrift::Types::STRUCT, :name => 'decimalStats', :class => ::DecimalColumnStatsData}, - DATESTATS => {:type => ::Thrift::Types::STRUCT, :name => 'dateStats', :class => ::DateColumnStatsData} + DATESTATS => {:type => ::Thrift::Types::STRUCT, :name => 'dateStats', :class => ::DateColumnStatsData}, + TIMESTAMPSTATS => {:type => ::Thrift::Types::STRUCT, :name => 'timestampStats', :class => ::TimestampColumnStatsData} } def struct_fields; FIELDS; end diff --git a/standalone-metastore/metastore-common/src/main/thrift/hive_metastore.thrift b/standalone-metastore/metastore-common/src/main/thrift/hive_metastore.thrift index 1aa88877d4..927e14d64e 100644 --- a/standalone-metastore/metastore-common/src/main/thrift/hive_metastore.thrift +++ b/standalone-metastore/metastore-common/src/main/thrift/hive_metastore.thrift @@ -562,6 +562,18 @@ struct DateColumnStatsData { 5: optional binary bitVectors } +struct Timestamp { +1: required i64 secondsSinceEpoch +} + +struct TimestampColumnStatsData { +1: optional Timestamp lowValue, +2: optional Timestamp highValue, +3: required i64 numNulls, +4: required i64 numDVs, +5: optional binary bitVectors +} + union ColumnStatisticsData { 1: BooleanColumnStatsData booleanStats, 2: LongColumnStatsData longStats, @@ -569,7 +581,8 @@ union ColumnStatisticsData { 4: StringColumnStatsData stringStats, 5: BinaryColumnStatsData binaryStats, 6: DecimalColumnStatsData decimalStats, -7: DateColumnStatsData dateStats +7: DateColumnStatsData dateStats, +8: TimestampColumnStatsData timestampStats } struct ColumnStatisticsObj { diff --git a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/StatObjectConverter.java b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/StatObjectConverter.java index e8d197abb4..bc177e7347 100644 --- a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/StatObjectConverter.java +++ b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/StatObjectConverter.java @@ -37,12 +37,15 @@ import org.apache.hadoop.hive.metastore.api.MetaException; import org.apache.hadoop.hive.metastore.api.NoSuchObjectException; import org.apache.hadoop.hive.metastore.api.StringColumnStatsData; +import org.apache.hadoop.hive.metastore.api.Timestamp; +import org.apache.hadoop.hive.metastore.api.TimestampColumnStatsData; import org.apache.hadoop.hive.metastore.api.utils.DecimalUtils; import org.apache.hadoop.hive.metastore.columnstats.cache.DateColumnStatsDataInspector; import org.apache.hadoop.hive.metastore.columnstats.cache.DecimalColumnStatsDataInspector; import org.apache.hadoop.hive.metastore.columnstats.cache.DoubleColumnStatsDataInspector; import org.apache.hadoop.hive.metastore.columnstats.cache.LongColumnStatsDataInspector; import org.apache.hadoop.hive.metastore.columnstats.cache.StringColumnStatsDataInspector; +import org.apache.hadoop.hive.metastore.columnstats.cache.TimestampColumnStatsDataInspector; import org.apache.hadoop.hive.metastore.model.MPartition; import org.apache.hadoop.hive.metastore.model.MPartitionColumnStatistics; import org.apache.hadoop.hive.metastore.model.MTable; @@ -126,6 +129,14 @@ public static MTableColumnStatistics convertToMTableColumnStatistics(MTable tabl dateStats.isSetBitVectors() ? dateStats.getBitVectors() : null, dateStats.isSetLowValue() ? dateStats.getLowValue().getDaysSinceEpoch() : null, dateStats.isSetHighValue() ? dateStats.getHighValue().getDaysSinceEpoch() : null); + } else if (statsObj.getStatsData().isSetTimestampStats()) { + TimestampColumnStatsData timestampStats = statsObj.getStatsData().getTimestampStats(); + mColStats.setTimestampStats( + timestampStats.isSetNumNulls() ? timestampStats.getNumNulls() : null, + timestampStats.isSetNumDVs() ? timestampStats.getNumDVs() : null, + timestampStats.isSetBitVectors() ? timestampStats.getBitVectors() : null, + timestampStats.isSetLowValue() ? timestampStats.getLowValue().getSecondsSinceEpoch() : null, + timestampStats.isSetHighValue() ? timestampStats.getHighValue().getSecondsSinceEpoch() : null); } mColStats.setEngine(engine); return mColStats; @@ -251,8 +262,7 @@ public static ColumnStatisticsObj getTableColumnStatisticsObj( binaryStats.setMaxColLen(mStatsObj.getMaxColLen()); colStatsData.setBinaryStats(binaryStats); } else if (colType.equals("bigint") || colType.equals("int") || - colType.equals("smallint") || colType.equals("tinyint") || - colType.equals("timestamp")) { + colType.equals("smallint") || colType.equals("tinyint")) { LongColumnStatsDataInspector longStats = new LongColumnStatsDataInspector(); longStats.setNumNulls(mStatsObj.getNumNulls()); Long longHighValue = mStatsObj.getLongHighValue(); @@ -308,6 +318,20 @@ public static ColumnStatisticsObj getTableColumnStatisticsObj( dateStats.setNumDVs(mStatsObj.getNumDVs()); dateStats.setBitVectors((mStatsObj.getBitVector()==null||!enableBitVector)? null : mStatsObj.getBitVector()); colStatsData.setDateStats(dateStats); + } else if (colType.equals("timestamp")) { + TimestampColumnStatsDataInspector timestampStats = new TimestampColumnStatsDataInspector(); + timestampStats.setNumNulls(mStatsObj.getNumNulls()); + Long highValue = mStatsObj.getLongHighValue(); + if (highValue != null) { + timestampStats.setHighValue(new Timestamp(highValue)); + } + Long lowValue = mStatsObj.getLongLowValue(); + if (lowValue != null) { + timestampStats.setLowValue(new Timestamp(lowValue)); + } + timestampStats.setNumDVs(mStatsObj.getNumDVs()); + timestampStats.setBitVectors((mStatsObj.getBitVector()==null||!enableBitVector)? null : mStatsObj.getBitVector()); + colStatsData.setTimestampStats(timestampStats); } statsObj.setStatsData(colStatsData); return statsObj; @@ -394,6 +418,14 @@ public static MPartitionColumnStatistics convertToMPartitionColumnStatistics( dateStats.isSetBitVectors() ? dateStats.getBitVectors() : null, dateStats.isSetLowValue() ? dateStats.getLowValue().getDaysSinceEpoch() : null, dateStats.isSetHighValue() ? dateStats.getHighValue().getDaysSinceEpoch() : null); + } else if (statsObj.getStatsData().isSetTimestampStats()) { + TimestampColumnStatsData timestampStats = statsObj.getStatsData().getTimestampStats(); + mColStats.setTimestampStats( + timestampStats.isSetNumNulls() ? timestampStats.getNumNulls() : null, + timestampStats.isSetNumDVs() ? timestampStats.getNumDVs() : null, + timestampStats.isSetBitVectors() ? timestampStats.getBitVectors() : null, + timestampStats.isSetLowValue() ? timestampStats.getLowValue().getSecondsSinceEpoch() : null, + timestampStats.isSetHighValue() ? timestampStats.getHighValue().getSecondsSinceEpoch() : null); } mColStats.setEngine(engine); return mColStats; @@ -429,8 +461,7 @@ public static ColumnStatisticsObj getPartitionColumnStatisticsObj( binaryStats.setMaxColLen(mStatsObj.getMaxColLen()); colStatsData.setBinaryStats(binaryStats); } else if (colType.equals("tinyint") || colType.equals("smallint") || - colType.equals("int") || colType.equals("bigint") || - colType.equals("timestamp")) { + colType.equals("int") || colType.equals("bigint")) { LongColumnStatsDataInspector longStats = new LongColumnStatsDataInspector(); longStats.setNumNulls(mStatsObj.getNumNulls()); if (mStatsObj.getLongHighValue() != null) { @@ -480,6 +511,20 @@ public static ColumnStatisticsObj getPartitionColumnStatisticsObj( dateStats.setNumDVs(mStatsObj.getNumDVs()); dateStats.setBitVectors((mStatsObj.getBitVector()==null||!enableBitVector)? null : mStatsObj.getBitVector()); colStatsData.setDateStats(dateStats); + } else if (colType.equals("timestamp")) { + TimestampColumnStatsDataInspector timestampStats = new TimestampColumnStatsDataInspector(); + timestampStats.setNumNulls(mStatsObj.getNumNulls()); + Long highValue = mStatsObj.getLongHighValue(); + if (highValue != null) { + timestampStats.setHighValue(new Timestamp(highValue)); + } + Long lowValue = mStatsObj.getLongLowValue(); + if (lowValue != null) { + timestampStats.setLowValue(new Timestamp(lowValue)); + } + timestampStats.setNumDVs(mStatsObj.getNumDVs()); + timestampStats.setBitVectors((mStatsObj.getBitVector()==null||!enableBitVector)? null : mStatsObj.getBitVector()); + colStatsData.setTimestampStats(timestampStats); } statsObj.setStatsData(colStatsData); return statsObj; @@ -524,8 +569,7 @@ public static void fillColumnStatisticsData(String colType, ColumnStatisticsData binaryStats.setMaxColLen(MetastoreDirectSqlUtils.extractSqlLong(maxlen)); data.setBinaryStats(binaryStats); } else if (colType.equals("bigint") || colType.equals("int") || - colType.equals("smallint") || colType.equals("tinyint") || - colType.equals("timestamp")) { + colType.equals("smallint") || colType.equals("tinyint")) { LongColumnStatsDataInspector longStats = new LongColumnStatsDataInspector(); longStats.setNumNulls(MetastoreDirectSqlUtils.extractSqlLong(nulls)); if (lhigh != null) { @@ -573,6 +617,18 @@ public static void fillColumnStatisticsData(String colType, ColumnStatisticsData dateStats.setNumDVs(MetastoreDirectSqlUtils.extractSqlLong(dist)); dateStats.setBitVectors(MetastoreDirectSqlUtils.extractSqlBlob(bitVector)); data.setDateStats(dateStats); + } else if (colType.equals("timestamp")) { + TimestampColumnStatsDataInspector timestampStats = new TimestampColumnStatsDataInspector(); + timestampStats.setNumNulls(MetastoreDirectSqlUtils.extractSqlLong(nulls)); + if (lhigh != null) { + timestampStats.setHighValue(new Timestamp(MetastoreDirectSqlUtils.extractSqlLong(lhigh))); + } + if (llow != null) { + timestampStats.setLowValue(new Timestamp(MetastoreDirectSqlUtils.extractSqlLong(llow))); + } + timestampStats.setNumDVs(MetastoreDirectSqlUtils.extractSqlLong(dist)); + timestampStats.setBitVectors(MetastoreDirectSqlUtils.extractSqlBlob(bitVector)); + data.setTimestampStats(timestampStats); } } @@ -604,7 +660,7 @@ public static void fillColumnStatisticsData(String colType, ColumnStatisticsData binaryStats.setMaxColLen(MetastoreDirectSqlUtils.extractSqlLong(maxlen)); data.setBinaryStats(binaryStats); } else if (colType.equals("bigint") || colType.equals("int") || colType.equals("smallint") - || colType.equals("tinyint") || colType.equals("timestamp")) { + || colType.equals("tinyint")) { LongColumnStatsDataInspector longStats = new LongColumnStatsDataInspector(); longStats.setNumNulls(MetastoreDirectSqlUtils.extractSqlLong(nulls)); if (lhigh != null) { @@ -674,6 +730,41 @@ public static void fillColumnStatisticsData(String colType, ColumnStatisticsData estimation = Math.min(estimation, rangeBound); dateStats.setNumDVs(estimation); data.setDateStats(dateStats); + } else if (colType.equals("timestamp")) { + TimestampColumnStatsDataInspector timestampStats = new TimestampColumnStatsDataInspector(); + timestampStats.setNumNulls(MetastoreDirectSqlUtils.extractSqlLong(nulls)); + if (lhigh != null) { + timestampStats.setHighValue(new Timestamp(MetastoreDirectSqlUtils.extractSqlLong(lhigh))); + } + if (llow != null) { + timestampStats.setLowValue(new Timestamp(MetastoreDirectSqlUtils.extractSqlLong(llow))); + } + long lowerBound = MetastoreDirectSqlUtils.extractSqlLong(dist); + long higherBound = MetastoreDirectSqlUtils.extractSqlLong(sumDist); + long rangeBound = Long.MAX_VALUE; + if (lhigh != null && llow != null) { + rangeBound = MetastoreDirectSqlUtils.extractSqlLong(lhigh) + - MetastoreDirectSqlUtils.extractSqlLong(llow) + 1; + } + long estimation; + if (useDensityFunctionForNDVEstimation && lhigh != null && llow != null && avgLong != null + && MetastoreDirectSqlUtils.extractSqlDouble(avgLong) != 0.0) { + // We have estimation, lowerbound and higherbound. We use estimation if + // it is between lowerbound and higherbound. + estimation = MetastoreDirectSqlUtils + .extractSqlLong((MetastoreDirectSqlUtils.extractSqlLong(lhigh) - MetastoreDirectSqlUtils + .extractSqlLong(llow)) / MetastoreDirectSqlUtils.extractSqlDouble(avgLong)); + if (estimation < lowerBound) { + estimation = lowerBound; + } else if (estimation > higherBound) { + estimation = higherBound; + } + } else { + estimation = (long) (lowerBound + (higherBound - lowerBound) * ndvTuner); + } + estimation = Math.min(estimation, rangeBound); + timestampStats.setNumDVs(estimation); + data.setTimestampStats(timestampStats); } else if (colType.equals("double") || colType.equals("float")) { DoubleColumnStatsDataInspector doubleStats = new DoubleColumnStatsDataInspector(); doubleStats.setNumNulls(MetastoreDirectSqlUtils.extractSqlLong(nulls)); @@ -888,6 +979,26 @@ public static void setFieldsIntoOldStats(ColumnStatisticsObj oldStatObj, } break; } + case TIMESTAMP_STATS: { + TimestampColumnStatsData oldTimestampStatsData = oldStatObj.getStatsData().getTimestampStats(); + TimestampColumnStatsData newTimestampStatsData = newStatObj.getStatsData().getTimestampStats(); + if (newTimestampStatsData.isSetHighValue()) { + oldTimestampStatsData.setHighValue(newTimestampStatsData.getHighValue()); + } + if (newTimestampStatsData.isSetLowValue()) { + oldTimestampStatsData.setLowValue(newTimestampStatsData.getLowValue()); + } + if (newTimestampStatsData.isSetNumNulls()) { + oldTimestampStatsData.setNumNulls(newTimestampStatsData.getNumNulls()); + } + if (newTimestampStatsData.isSetNumDVs()) { + oldTimestampStatsData.setNumDVs(newTimestampStatsData.getNumDVs()); + } + if (newTimestampStatsData.isSetBitVectors()) { + oldTimestampStatsData.setBitVectors(newTimestampStatsData.getBitVectors()); + } + break; + } default: throw new IllegalArgumentException("Unknown stats type: " + typeNew.toString()); } diff --git a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/ColumnsStatsUtils.java b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/ColumnsStatsUtils.java index 2d6d2261f7..dee750d99e 100644 --- a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/ColumnsStatsUtils.java +++ b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/ColumnsStatsUtils.java @@ -23,6 +23,7 @@ import org.apache.hadoop.hive.metastore.columnstats.cache.DecimalColumnStatsDataInspector; import org.apache.hadoop.hive.metastore.columnstats.cache.DoubleColumnStatsDataInspector; import org.apache.hadoop.hive.metastore.columnstats.cache.LongColumnStatsDataInspector; +import org.apache.hadoop.hive.metastore.columnstats.cache.TimestampColumnStatsDataInspector; import org.apache.hadoop.hive.metastore.columnstats.cache.StringColumnStatsDataInspector; /** @@ -48,6 +49,22 @@ public static DateColumnStatsDataInspector dateInspectorFromStats(ColumnStatisti return dateColumnStats; } + /** + * Convertes to TimestampColumnStatsDataInspector if it's a TimestampColumnStatsData. + * @param cso ColumnStatisticsObj + * @return TimestampColumnStatsDataInspector + */ + public static TimestampColumnStatsDataInspector timestampInspectorFromStats(ColumnStatisticsObj cso) { + TimestampColumnStatsDataInspector timestampColumnStats; + if (cso.getStatsData().getTimestampStats() instanceof TimestampColumnStatsDataInspector) { + timestampColumnStats = + (TimestampColumnStatsDataInspector)(cso.getStatsData().getTimestampStats()); + } else { + timestampColumnStats = new TimestampColumnStatsDataInspector(cso.getStatsData().getTimestampStats()); + } + return timestampColumnStats; + } + /** * Convertes to StringColumnStatsDataInspector * if it's a StringColumnStatsData. diff --git a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/aggr/ColumnStatsAggregatorFactory.java b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/aggr/ColumnStatsAggregatorFactory.java index 7aaab4a6b9..c2d14dfccc 100644 --- a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/aggr/ColumnStatsAggregatorFactory.java +++ b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/aggr/ColumnStatsAggregatorFactory.java @@ -29,6 +29,7 @@ import org.apache.hadoop.hive.metastore.columnstats.cache.DoubleColumnStatsDataInspector; import org.apache.hadoop.hive.metastore.columnstats.cache.LongColumnStatsDataInspector; import org.apache.hadoop.hive.metastore.columnstats.cache.StringColumnStatsDataInspector; +import org.apache.hadoop.hive.metastore.columnstats.cache.TimestampColumnStatsDataInspector; public class ColumnStatsAggregatorFactory { @@ -48,6 +49,9 @@ public static ColumnStatsAggregator getColumnStatsAggregator(_Fields type, case DATE_STATS: agg = new DateColumnStatsAggregator(); break; + case TIMESTAMP_STATS: + agg = new TimestampColumnStatsAggregator(); + break; case DOUBLE_STATS: agg = new DoubleColumnStatsAggregator(); break; @@ -86,6 +90,10 @@ public static ColumnStatisticsObj newColumnStaticsObj(String colName, String col csd.setDateStats(new DateColumnStatsDataInspector()); break; + case TIMESTAMP_STATS: + csd.setTimestampStats(new TimestampColumnStatsDataInspector()); + break; + case DOUBLE_STATS: csd.setDoubleStats(new DoubleColumnStatsDataInspector()); break; diff --git a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/aggr/TimestampColumnStatsAggregator.java b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/aggr/TimestampColumnStatsAggregator.java new file mode 100644 index 0000000000..c7c7bbee6e --- /dev/null +++ b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/aggr/TimestampColumnStatsAggregator.java @@ -0,0 +1,358 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hadoop.hive.metastore.columnstats.aggr; + +import java.util.Collections; +import java.util.Comparator; +import java.util.HashMap; +import java.util.LinkedList; +import java.util.List; +import java.util.Map; +import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimator; +import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimatorFactory; +import org.apache.hadoop.hive.metastore.api.ColumnStatisticsData; +import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj; +import org.apache.hadoop.hive.metastore.api.Timestamp; +import org.apache.hadoop.hive.metastore.api.TimestampColumnStatsData; +import org.apache.hadoop.hive.metastore.api.MetaException; +import org.apache.hadoop.hive.metastore.columnstats.cache.TimestampColumnStatsDataInspector; +import org.apache.hadoop.hive.metastore.utils.MetaStoreServerUtils.ColStatsObjWithSourceInfo; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import static org.apache.hadoop.hive.metastore.columnstats.ColumnsStatsUtils.timestampInspectorFromStats; + +public class TimestampColumnStatsAggregator extends ColumnStatsAggregator implements + IExtrapolatePartStatus { + + private static final Logger LOG = LoggerFactory.getLogger(TimestampColumnStatsAggregator.class); + + @Override + public ColumnStatisticsObj aggregate(List colStatsWithSourceInfo, + List partNames, boolean areAllPartsFound) throws MetaException { + ColumnStatisticsObj statsObj = null; + String colType = null; + String colName = null; + // check if all the ColumnStatisticsObjs contain stats and all the ndv are + // bitvectors + boolean doAllPartitionContainStats = partNames.size() == colStatsWithSourceInfo.size(); + NumDistinctValueEstimator ndvEstimator = null; + for (ColStatsObjWithSourceInfo csp : colStatsWithSourceInfo) { + ColumnStatisticsObj cso = csp.getColStatsObj(); + if (statsObj == null) { + colName = cso.getColName(); + colType = cso.getColType(); + statsObj = ColumnStatsAggregatorFactory.newColumnStaticsObj(colName, colType, + cso.getStatsData().getSetField()); + LOG.trace("doAllPartitionContainStats for column: {} is: {}", colName, doAllPartitionContainStats); + } + TimestampColumnStatsDataInspector timestampColumnStats = timestampInspectorFromStats(cso); + + if (timestampColumnStats.getNdvEstimator() == null) { + ndvEstimator = null; + break; + } else { + // check if all of the bit vectors can merge + NumDistinctValueEstimator estimator = timestampColumnStats.getNdvEstimator(); + if (ndvEstimator == null) { + ndvEstimator = estimator; + } else { + if (ndvEstimator.canMerge(estimator)) { + continue; + } else { + ndvEstimator = null; + break; + } + } + } + } + if (ndvEstimator != null) { + ndvEstimator = NumDistinctValueEstimatorFactory + .getEmptyNumDistinctValueEstimator(ndvEstimator); + } + LOG.debug("all of the bit vectors can merge for " + colName + " is " + (ndvEstimator != null)); + ColumnStatisticsData columnStatisticsData = new ColumnStatisticsData(); + if (doAllPartitionContainStats || colStatsWithSourceInfo.size() < 2) { + TimestampColumnStatsDataInspector aggregateData = null; + long lowerBound = 0; + long higherBound = 0; + double densityAvgSum = 0.0; + for (ColStatsObjWithSourceInfo csp : colStatsWithSourceInfo) { + ColumnStatisticsObj cso = csp.getColStatsObj(); + TimestampColumnStatsDataInspector newData = timestampInspectorFromStats(cso); + higherBound += newData.getNumDVs(); + densityAvgSum += (diff(newData.getHighValue(), newData.getLowValue())) + / newData.getNumDVs(); + if (ndvEstimator != null) { + ndvEstimator.mergeEstimators(newData.getNdvEstimator()); + } + if (aggregateData == null) { + aggregateData = newData.deepCopy(); + } else { + aggregateData.setLowValue(min(aggregateData.getLowValue(), newData.getLowValue())); + aggregateData + .setHighValue(max(aggregateData.getHighValue(), newData.getHighValue())); + aggregateData.setNumNulls(aggregateData.getNumNulls() + newData.getNumNulls()); + aggregateData.setNumDVs(Math.max(aggregateData.getNumDVs(), newData.getNumDVs())); + } + } + if (ndvEstimator != null) { + // if all the ColumnStatisticsObjs contain bitvectors, we do not need to + // use uniform distribution assumption because we can merge bitvectors + // to get a good estimation. + aggregateData.setNumDVs(ndvEstimator.estimateNumDistinctValues()); + } else { + long estimation; + if (useDensityFunctionForNDVEstimation) { + // We have estimation, lowerbound and higherbound. We use estimation + // if it is between lowerbound and higherbound. + double densityAvg = densityAvgSum / partNames.size(); + estimation = (long) (diff(aggregateData.getHighValue(), aggregateData.getLowValue()) / densityAvg); + if (estimation < lowerBound) { + estimation = lowerBound; + } else if (estimation > higherBound) { + estimation = higherBound; + } + } else { + estimation = (long) (lowerBound + (higherBound - lowerBound) * ndvTuner); + } + aggregateData.setNumDVs(estimation); + } + columnStatisticsData.setTimestampStats(aggregateData); + } else { + // we need extrapolation + LOG.debug("start extrapolation for " + colName); + + Map indexMap = new HashMap<>(); + for (int index = 0; index < partNames.size(); index++) { + indexMap.put(partNames.get(index), index); + } + Map adjustedIndexMap = new HashMap<>(); + Map adjustedStatsMap = new HashMap<>(); + // while we scan the css, we also get the densityAvg, lowerbound and + // higerbound when useDensityFunctionForNDVEstimation is true. + double densityAvgSum = 0.0; + if (ndvEstimator == null) { + // if not every partition uses bitvector for ndv, we just fall back to + // the traditional extrapolation methods. + for (ColStatsObjWithSourceInfo csp : colStatsWithSourceInfo) { + ColumnStatisticsObj cso = csp.getColStatsObj(); + String partName = csp.getPartName(); + TimestampColumnStatsData newData = cso.getStatsData().getTimestampStats(); + if (useDensityFunctionForNDVEstimation) { + densityAvgSum += diff(newData.getHighValue(), newData.getLowValue()) / newData.getNumDVs(); + } + adjustedIndexMap.put(partName, (double) indexMap.get(partName)); + adjustedStatsMap.put(partName, cso.getStatsData()); + } + } else { + // we first merge all the adjacent bitvectors that we could merge and + // derive new partition names and index. + StringBuilder pseudoPartName = new StringBuilder(); + double pseudoIndexSum = 0; + int length = 0; + int curIndex = -1; + TimestampColumnStatsDataInspector aggregateData = null; + for (ColStatsObjWithSourceInfo csp : colStatsWithSourceInfo) { + ColumnStatisticsObj cso = csp.getColStatsObj(); + String partName = csp.getPartName(); + TimestampColumnStatsDataInspector newData = timestampInspectorFromStats(cso); + // newData.isSetBitVectors() should be true for sure because we + // already checked it before. + if (indexMap.get(partName) != curIndex) { + // There is bitvector, but it is not adjacent to the previous ones. + if (length > 0) { + // we have to set ndv + adjustedIndexMap.put(pseudoPartName.toString(), pseudoIndexSum / length); + aggregateData.setNumDVs(ndvEstimator.estimateNumDistinctValues()); + ColumnStatisticsData csd = new ColumnStatisticsData(); + csd.setTimestampStats(aggregateData); + adjustedStatsMap.put(pseudoPartName.toString(), csd); + if (useDensityFunctionForNDVEstimation) { + densityAvgSum += diff(aggregateData.getHighValue(), aggregateData.getLowValue()) + / aggregateData.getNumDVs(); + } + // reset everything + pseudoPartName = new StringBuilder(); + pseudoIndexSum = 0; + length = 0; + ndvEstimator = NumDistinctValueEstimatorFactory.getEmptyNumDistinctValueEstimator(ndvEstimator); + } + aggregateData = null; + } + curIndex = indexMap.get(partName); + pseudoPartName.append(partName); + pseudoIndexSum += curIndex; + length++; + curIndex++; + if (aggregateData == null) { + aggregateData = newData.deepCopy(); + } else { + aggregateData.setLowValue(min(aggregateData.getLowValue(), newData.getLowValue())); + aggregateData.setHighValue(max(aggregateData.getHighValue(), newData.getHighValue())); + aggregateData.setNumNulls(aggregateData.getNumNulls() + newData.getNumNulls()); + } + ndvEstimator.mergeEstimators(newData.getNdvEstimator()); + } + if (length > 0) { + // we have to set ndv + adjustedIndexMap.put(pseudoPartName.toString(), pseudoIndexSum / length); + aggregateData.setNumDVs(ndvEstimator.estimateNumDistinctValues()); + ColumnStatisticsData csd = new ColumnStatisticsData(); + csd.setTimestampStats(aggregateData); + adjustedStatsMap.put(pseudoPartName.toString(), csd); + if (useDensityFunctionForNDVEstimation) { + densityAvgSum += diff(aggregateData.getHighValue(), aggregateData.getLowValue()) + / aggregateData.getNumDVs(); + } + } + } + extrapolate(columnStatisticsData, partNames.size(), colStatsWithSourceInfo.size(), + adjustedIndexMap, adjustedStatsMap, densityAvgSum / adjustedStatsMap.size()); + } + LOG.debug( + "Ndv estimatation for {} is {} # of partitions requested: {} # of partitions found: {}", + colName, columnStatisticsData.getTimestampStats().getNumDVs(), partNames.size(), + colStatsWithSourceInfo.size()); + statsObj.setStatsData(columnStatisticsData); + return statsObj; + } + + private long diff(Timestamp d1, Timestamp d2) { + return d1.getSecondsSinceEpoch() - d2.getSecondsSinceEpoch(); + } + + private Timestamp min(Timestamp d1, Timestamp d2) { + return d1.compareTo(d2) < 0 ? d1 : d2; + } + + private Timestamp max(Timestamp d1, Timestamp d2) { + return d1.compareTo(d2) < 0 ? d2 : d1; + } + + @Override + public void extrapolate(ColumnStatisticsData extrapolateData, int numParts, + int numPartsWithStats, Map adjustedIndexMap, + Map adjustedStatsMap, double densityAvg) { + int rightBorderInd = numParts; + TimestampColumnStatsDataInspector extrapolateTimestampData = new TimestampColumnStatsDataInspector(); + Map extractedAdjustedStatsMap = new HashMap<>(); + for (Map.Entry entry : adjustedStatsMap.entrySet()) { + extractedAdjustedStatsMap.put(entry.getKey(), entry.getValue().getTimestampStats()); + } + List> list = new LinkedList<>( + extractedAdjustedStatsMap.entrySet()); + // get the lowValue + Collections.sort(list, new Comparator>() { + @Override + public int compare(Map.Entry o1, + Map.Entry o2) { + return o1.getValue().getLowValue().compareTo(o2.getValue().getLowValue()); + } + }); + double minInd = adjustedIndexMap.get(list.get(0).getKey()); + double maxInd = adjustedIndexMap.get(list.get(list.size() - 1).getKey()); + long lowValue = 0; + long min = list.get(0).getValue().getLowValue().getSecondsSinceEpoch(); + long max = list.get(list.size() - 1).getValue().getLowValue().getSecondsSinceEpoch(); + if (minInd == maxInd) { + lowValue = min; + } else if (minInd < maxInd) { + // left border is the min + lowValue = (long) (max - (max - min) * maxInd / (maxInd - minInd)); + } else { + // right border is the min + lowValue = (long) (max - (max - min) * (rightBorderInd - maxInd) / (minInd - maxInd)); + } + + // get the highValue + Collections.sort(list, new Comparator>() { + @Override + public int compare(Map.Entry o1, + Map.Entry o2) { + return o1.getValue().getHighValue().compareTo(o2.getValue().getHighValue()); + } + }); + minInd = adjustedIndexMap.get(list.get(0).getKey()); + maxInd = adjustedIndexMap.get(list.get(list.size() - 1).getKey()); + long highValue = 0; + min = list.get(0).getValue().getHighValue().getSecondsSinceEpoch(); + max = list.get(list.size() - 1).getValue().getHighValue().getSecondsSinceEpoch(); + if (minInd == maxInd) { + highValue = min; + } else if (minInd < maxInd) { + // right border is the max + highValue = (long) (min + (max - min) * (rightBorderInd - minInd) / (maxInd - minInd)); + } else { + // left border is the max + highValue = (long) (min + (max - min) * minInd / (minInd - maxInd)); + } + + // get the #nulls + long numNulls = 0; + for (Map.Entry entry : extractedAdjustedStatsMap.entrySet()) { + numNulls += entry.getValue().getNumNulls(); + } + // we scale up sumNulls based on the number of partitions + numNulls = numNulls * numParts / numPartsWithStats; + + // get the ndv + long ndv = 0; + Collections.sort(list, new Comparator>() { + @Override + public int compare(Map.Entry o1, + Map.Entry o2) { + return Long.compare(o1.getValue().getNumDVs(), o2.getValue().getNumDVs()); + } + }); + long lowerBound = list.get(list.size() - 1).getValue().getNumDVs(); + long higherBound = 0; + for (Map.Entry entry : list) { + higherBound += entry.getValue().getNumDVs(); + } + if (useDensityFunctionForNDVEstimation && densityAvg != 0.0) { + ndv = (long) ((highValue - lowValue) / densityAvg); + if (ndv < lowerBound) { + ndv = lowerBound; + } else if (ndv > higherBound) { + ndv = higherBound; + } + } else { + minInd = adjustedIndexMap.get(list.get(0).getKey()); + maxInd = adjustedIndexMap.get(list.get(list.size() - 1).getKey()); + min = list.get(0).getValue().getNumDVs(); + max = list.get(list.size() - 1).getValue().getNumDVs(); + if (minInd == maxInd) { + ndv = min; + } else if (minInd < maxInd) { + // right border is the max + ndv = (long) (min + (max - min) * (rightBorderInd - minInd) / (maxInd - minInd)); + } else { + // left border is the max + ndv = (long) (min + (max - min) * minInd / (minInd - maxInd)); + } + } + extrapolateTimestampData.setLowValue(new Timestamp(lowValue)); + extrapolateTimestampData.setHighValue(new Timestamp(highValue)); + extrapolateTimestampData.setNumNulls(numNulls); + extrapolateTimestampData.setNumDVs(ndv); + extrapolateData.setTimestampStats(extrapolateTimestampData); + } +} diff --git a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/cache/TimestampColumnStatsDataInspector.java b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/cache/TimestampColumnStatsDataInspector.java new file mode 100644 index 0000000000..30b22b8292 --- /dev/null +++ b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/cache/TimestampColumnStatsDataInspector.java @@ -0,0 +1,127 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.metastore.columnstats.cache; + +import java.nio.ByteBuffer; +import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimator; +import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimatorFactory; +import org.apache.hadoop.hive.metastore.api.TimestampColumnStatsData; + +@SuppressWarnings("serial") +public class TimestampColumnStatsDataInspector extends TimestampColumnStatsData { + + private NumDistinctValueEstimator ndvEstimator; + + public TimestampColumnStatsDataInspector() { + super(); + } + + public TimestampColumnStatsDataInspector(long numNulls, long numDVs) { + super(numNulls, numDVs); + } + + public TimestampColumnStatsDataInspector(TimestampColumnStatsDataInspector other) { + super(other); + if (other.ndvEstimator != null) { + super.setBitVectors(ndvEstimator.serialize()); + } + } + + public TimestampColumnStatsDataInspector(TimestampColumnStatsData other) { + super(other); + } + + @Override + public TimestampColumnStatsDataInspector deepCopy() { + return new TimestampColumnStatsDataInspector(this); + } + + @Override + public byte[] getBitVectors() { + if (ndvEstimator != null) { + updateBitVectors(); + } + return super.getBitVectors(); + } + + @Override + public ByteBuffer bufferForBitVectors() { + if (ndvEstimator != null) { + updateBitVectors(); + } + return super.bufferForBitVectors(); + } + + @Override + public void setBitVectors(byte[] bitVectors) { + super.setBitVectors(bitVectors); + this.ndvEstimator = null; + } + + @Override + public void setBitVectors(ByteBuffer bitVectors) { + super.setBitVectors(bitVectors); + this.ndvEstimator = null; + } + + @Override + public void unsetBitVectors() { + super.unsetBitVectors(); + this.ndvEstimator = null; + } + + @Override + public boolean isSetBitVectors() { + if (ndvEstimator != null) { + updateBitVectors(); + } + return super.isSetBitVectors(); + } + + @Override + public void setBitVectorsIsSet(boolean value) { + if (ndvEstimator != null) { + updateBitVectors(); + } + super.setBitVectorsIsSet(value); + } + + public NumDistinctValueEstimator getNdvEstimator() { + if (isSetBitVectors() && getBitVectors().length != 0) { + updateNdvEstimator(); + } + return ndvEstimator; + } + + public void setNdvEstimator(NumDistinctValueEstimator ndvEstimator) { + super.unsetBitVectors(); + this.ndvEstimator = ndvEstimator; + } + + private void updateBitVectors() { + super.setBitVectors(ndvEstimator.serialize()); + this.ndvEstimator = null; + } + + private void updateNdvEstimator() { + this.ndvEstimator = NumDistinctValueEstimatorFactory + .getNumDistinctValueEstimator(super.getBitVectors()); + super.unsetBitVectors(); + } + +} diff --git a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/merge/ColumnStatsMergerFactory.java b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/merge/ColumnStatsMergerFactory.java index 64d07c77b7..261437bcf1 100644 --- a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/merge/ColumnStatsMergerFactory.java +++ b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/merge/ColumnStatsMergerFactory.java @@ -29,6 +29,7 @@ import org.apache.hadoop.hive.metastore.columnstats.cache.DoubleColumnStatsDataInspector; import org.apache.hadoop.hive.metastore.columnstats.cache.LongColumnStatsDataInspector; import org.apache.hadoop.hive.metastore.columnstats.cache.StringColumnStatsDataInspector; +import org.apache.hadoop.hive.metastore.columnstats.cache.TimestampColumnStatsDataInspector; public class ColumnStatsMergerFactory { @@ -69,6 +70,10 @@ public static ColumnStatsMerger getColumnStatsMerger(ColumnStatisticsObj statsOb agg = new DateColumnStatsMerger(); break; } + case TIMESTAMP_STATS: { + agg = new TimestampColumnStatsMerger(); + break; + } default: throw new IllegalArgumentException("Unknown stats type " + statsObjNew.getStatsData().getSetField()); } @@ -109,6 +114,10 @@ public static ColumnStatisticsObj newColumnStaticsObj(String colName, String col csd.setDateStats(new DateColumnStatsDataInspector()); break; + case TIMESTAMP_STATS: + csd.setTimestampStats(new TimestampColumnStatsDataInspector()); + break; + default: throw new IllegalArgumentException("Unknown stats type"); } diff --git a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/merge/TimestampColumnStatsMerger.java b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/merge/TimestampColumnStatsMerger.java new file mode 100644 index 0000000000..efe6054333 --- /dev/null +++ b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/merge/TimestampColumnStatsMerger.java @@ -0,0 +1,80 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hadoop.hive.metastore.columnstats.merge; + +import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimator; +import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj; +import org.apache.hadoop.hive.metastore.api.Timestamp; +import org.apache.hadoop.hive.metastore.columnstats.cache.TimestampColumnStatsDataInspector; + +import static org.apache.hadoop.hive.metastore.columnstats.ColumnsStatsUtils.timestampInspectorFromStats; + +public class TimestampColumnStatsMerger extends ColumnStatsMerger { + @Override + public void merge(ColumnStatisticsObj aggregateColStats, ColumnStatisticsObj newColStats) { + TimestampColumnStatsDataInspector aggregateData = timestampInspectorFromStats(aggregateColStats); + TimestampColumnStatsDataInspector newData = timestampInspectorFromStats(newColStats); + + Timestamp lowValue = min(aggregateData.getLowValue(), newData.getLowValue()); + aggregateData.setLowValue(lowValue); + Timestamp highValue = max(aggregateData.getHighValue(), newData.getHighValue()); + aggregateData.setHighValue(highValue); + aggregateData.setNumNulls(aggregateData.getNumNulls() + newData.getNumNulls()); + if (aggregateData.getNdvEstimator() == null || newData.getNdvEstimator() == null) { + aggregateData.setNumDVs(Math.max(aggregateData.getNumDVs(), newData.getNumDVs())); + } else { + NumDistinctValueEstimator oldEst = aggregateData.getNdvEstimator(); + NumDistinctValueEstimator newEst = newData.getNdvEstimator(); + long ndv = -1; + if (oldEst.canMerge(newEst)) { + oldEst.mergeEstimators(newEst); + ndv = oldEst.estimateNumDistinctValues(); + aggregateData.setNdvEstimator(oldEst); + } else { + ndv = Math.max(aggregateData.getNumDVs(), newData.getNumDVs()); + } + LOG.debug("Use bitvector to merge column " + aggregateColStats.getColName() + "'s ndvs of " + + aggregateData.getNumDVs() + " and " + newData.getNumDVs() + " to be " + ndv); + aggregateData.setNumDVs(ndv); + } + } + + private Timestamp min(Timestamp v1, Timestamp v2) { + if (v1 == null || v2 == null) { + if (v1 != null) { + return v1; + } else { + return v2; + } + } + return v1.compareTo(v2) < 0 ? v1 : v2; + } + + private Timestamp max(Timestamp v1, Timestamp v2) { + if (v1 == null || v2 == null) { + if (v1 != null) { + return v1; + } else { + return v2; + } + } + return v1.compareTo(v2) > 0 ? v1 : v2; + } +} diff --git a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/model/MPartitionColumnStatistics.java b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/model/MPartitionColumnStatistics.java index 27accc66da..102e0fdf26 100644 --- a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/model/MPartitionColumnStatistics.java +++ b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/model/MPartitionColumnStatistics.java @@ -224,6 +224,14 @@ public void setDateStats(Long numNulls, Long numNDVs, byte[] bitVector, Long low this.longHighValue = highValue; } + public void setTimestampStats(Long numNulls, Long numNDVs, byte[] bitVector, Long lowValue, Long highValue) { + this.numNulls = numNulls; + this.numDVs = numNDVs; + this.bitVector = bitVector; + this.longLowValue = lowValue; + this.longHighValue = highValue; + } + public Long getLongLowValue() { return longLowValue; } diff --git a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/model/MTableColumnStatistics.java b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/model/MTableColumnStatistics.java index 81c3f8c1c3..2024bae366 100644 --- a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/model/MTableColumnStatistics.java +++ b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/model/MTableColumnStatistics.java @@ -214,6 +214,14 @@ public void setDateStats(Long numNulls, Long numNDVs, byte[] bitVector, Long low this.longHighValue = highValue; } + public void setTimestampStats(Long numNulls, Long numNDVs, byte[] bitVector, Long lowValue, Long highValue) { + this.numNulls = numNulls; + this.numDVs = numNDVs; + this.bitVector = bitVector; + this.longLowValue = lowValue; + this.longHighValue = highValue; + } + public Long getLongLowValue() { return longLowValue; }